The following patches implement a more generalized infrastructure (than
latency.c) for connecting drivers and subsystem's that could implement
power performance optimizations with the data needed to implement such
policies.
These patches are following up on the discussions and presentations at
the power management summit last summer.
The idea is that from an abstract point of view how much to throttle
hardware can be expressed as a function of QoS types of parameters;
Latency, throughput, and idle time outs.
The qos_parameter patch is intended to put into place the registration
and notification infrastructure for enabling QoS based policy choices by
drivers, where constraints on throttling are communicated through the
qos_params module.
Note: it implements a user mode registration protocol where user mode
QoS dependents open a misc device node and write there constraints. As
long as the file is held open the QoS constraint is included, when the
file closes this constraint is removed and a new constraint is computed.
I would like some feed back on the idea, implementation and possible
applications of this concept we could work on.
The current patch set is 2 patches.
1) qos_param : implements the infrastructure and user mode interface
2) no latency : removes latency.c from the kernel tree replacing it with
qos_param use.
I have a 3rd patch that's a hack application of this qos interface for
communicating latency constraints to e1000.c for tweaking the interrupt
processing of the e1000 based on acceptable latency gathered from the
infrastructure. I'm not proud of this last one but it helps express the
idea.
Also there is some more documentation and commentary (and an older
patch set) on
http://www.lesswatts.org/projects/power-qos/
thanks,
--mgross
The following is the qos_param patch that implements a genralization of
latency.c.
Signed-off-by: Mark Gross <[email protected]>
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/include/linux/qos_params.h linux-2.6.23-rc8-qos/include/linux/qos_params.h
--- linux-2.6.23-rc8/include/linux/qos_params.h 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.23-rc8-qos/include/linux/qos_params.h 2007-09-26 14:05:20.000000000 -0700
@@ -0,0 +1,35 @@
+/* interface for the qos_power infrastructure of the linux kernel.
+ *
+ * Mark Gross
+ */
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/miscdevice.h>
+
+struct requirement_list {
+ struct list_head list;
+ union {
+ s32 value;
+ s32 usec;
+ s32 kbps;
+ };
+ char *name;
+};
+
+#define QOS_RESERVED 0
+#define QOS_CPU_DMA_LATENCY 1
+#define QOS_NETWORK_LATENCY 2
+#define QOS_NETWORK_THROUGHPUT 3
+
+#define QOS_NUM_CLASSES 4
+#define QOS_DEFAULT_VALUE -1
+
+int qos_add_requirement(int qos, char *name, s32 value);
+int qos_update_requirement(int qos, char *name, s32 new_value);
+void qos_remove_requirement(int qos, char *name);
+
+int qos_requirement(int qos);
+
+int qos_add_notifier(int qos, struct notifier_block *notifier);
+int qos_remove_notifier(int qos, struct notifier_block *notifier);
+
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/Makefile linux-2.6.23-rc8-qos/kernel/Makefile
--- linux-2.6.23-rc8/kernel/Makefile 2007-09-26 13:54:54.000000000 -0700
+++ linux-2.6.23-rc8-qos/kernel/Makefile 2007-09-26 14:06:38.000000000 -0700
@@ -9,7 +9,7 @@
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
- utsname.o
+ utsname.o qos_params.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/qos_params.c linux-2.6.23-rc8-qos/kernel/qos_params.c
--- linux-2.6.23-rc8/kernel/qos_params.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.23-rc8-qos/kernel/qos_params.c 2007-09-26 15:21:51.000000000 -0700
@@ -0,0 +1,354 @@
+/*
+ * This module exposes the interface to kernel space for specifying
+ * QoS dependencies. It provides infrastructure for registration of:
+ *
+ * Dependents on a QoS value : register requirements
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based. Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ *
+ * There are 3 basic classes of QoS parameter: latency, timeout, throughput
+ * each have defined units:
+ * latency: usec
+ * timeout: usec <-- currently not used.
+ * throughput: kbs (kilo byte / sec)
+ *
+ * There are lists of qos_objects each one wrapping requirements, notifiers
+ *
+ * User mode requirements on a QOS parameter register themselves to the
+ * subsystem by opening the device node /dev/... and writing there request to
+ * the node. As long as the process holds a file handle open to the node the
+ * client continues to be accounted for. Upon file release the usermode
+ * requirement is removed and a new qos target is computed. This way when the
+ * requirement that the application has is cleaned up when closes the file
+ * pointer or exits the qos_object will get an opportunity to clean up.
+ *
+ * mark gross [email protected]
+ */
+
+#include <linux/qos_params.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * locking rule: all changes to target_value or requirements or notifiers lists or
+ * qos_object list and qos_objects need to happen with qos_lock held, taken
+ * with _irqsave. One lock to rule them all
+ */
+
+struct qos_object {
+ struct requirement_list requirements;
+ struct srcu_notifier_head notifiers;
+ struct miscdevice qos_power_miscdev;
+ char * name;
+ s32 default_value;
+ s32 target_value;
+ s32 (* comparitor)(s32,s32);
+};
+static struct qos_object qos_array[QOS_NUM_CLASSES];
+static DEFINE_SPINLOCK(qos_lock);
+
+static ssize_t qos_power_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos);
+static int qos_power_open(struct inode *inode, struct file *filp);
+static int qos_power_release(struct inode *inode, struct file *filp);
+
+static const struct file_operations qos_power_fops = {
+ .write = qos_power_write,
+ .open = qos_power_open,
+ .release = qos_power_release,
+};
+
+/* static helper functions */
+static s32 max_compare(s32 v1, s32 v2)
+{
+ if (v1 < v2)
+ return v2;
+ else
+ return v1;
+}
+
+static s32 min_compare(s32 v1, s32 v2)
+{
+ if (v1 < v2)
+ return v1;
+ else
+ return v2;
+}
+
+/* assumes qos_lock is held */
+static void update_target(int i)
+{
+ s32 extreme_value;
+ struct requirement_list *node;
+
+ extreme_value = qos_array[i].default_value;
+ list_for_each_entry(node, &qos_array[i].requirements.list, list) {
+ extreme_value = qos_array[i].comparitor(
+ extreme_value, node->value);
+ }
+ if (qos_array[i].target_value != extreme_value) {
+ qos_array[i].target_value = extreme_value;
+ printk(KERN_ERR "new target for qos %d is %d\n",i,
+ qos_array[i].target_value);
+ srcu_notifier_call_chain(&qos_array[i].notifiers,
+ (unsigned long) qos_array[i].target_value, NULL);
+ }
+}
+
+static int register_new_qos_misc(struct qos_object *qos)
+{
+ int ret;
+
+ qos->qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
+ qos->qos_power_miscdev.name = qos->name;
+ qos->qos_power_miscdev.fops = &qos_power_fops;
+
+ ret = misc_register(& qos->qos_power_miscdev);
+ if (ret < 0 )
+ printk(KERN_ERR "qos_power: misc_register returns %d.\n", ret);
+
+ return ret;
+}
+
+
+/* constructors */
+static void init_qos_object(int i, const char * name, s32 default_value,
+ s32 (* comparitor)(s32,s32))
+{
+ struct qos_object *qos = NULL;
+
+ if( i < QOS_NUM_CLASSES) {
+ qos = &qos_array[i];
+ qos->name = kstrdup(name, GFP_KERNEL);
+ if(!qos->name)
+ goto cleanup;
+
+ qos->default_value = default_value;
+ qos->target_value = default_value;
+ qos->comparitor = comparitor;
+ srcu_init_notifier_head(&qos->notifiers);
+ INIT_LIST_HEAD(&qos->requirements.list);
+ if (register_new_qos_misc(qos) < 0 )
+ goto cleanup;
+ }
+ return;
+cleanup:
+ kfree(qos->name);
+}
+
+static int find_qos_object_by_minor(int minor)
+{
+ int i;
+
+ for (i=0;i<QOS_NUM_CLASSES; i++) {
+ if (minor == qos_array[i].qos_power_miscdev.minor)
+ return i;
+ }
+ return -1;
+}
+
+static void new_latency_qos(int i, const char *name)
+{
+ init_qos_object(i, name, 2000 * USEC_PER_SEC, min_compare);
+ /* 2000 sec is about infinite */
+}
+
+static void new_throughput_qos(int i, const char *name)
+{
+ init_qos_object(i, name, 0, max_compare);
+}
+
+/* accessors: */
+
+int qos_requirement(int i)
+{
+ int ret_val;
+ unsigned long flags;
+ spin_lock_irqsave(&qos_lock, flags);
+
+ ret_val = qos_array[i].target_value;
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return ret_val;
+}
+EXPORT_SYMBOL_GPL(qos_requirement);
+
+
+int qos_add_requirement(int i, char *name, s32 value)
+{
+ struct requirement_list * dep;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+ if (dep) {
+ if (value == QOS_DEFAULT_VALUE)
+ dep->value = qos_array[i].default_value;
+ else
+ dep->value = value;
+ dep->name = kstrdup(name, GFP_KERNEL);
+ if(!dep->name)
+ goto cleanup;
+
+ list_add(&dep->list, &qos_array[i].requirements.list);
+ update_target(i);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return 0;
+ }
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+cleanup:
+ if(dep)
+ kfree(dep);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(qos_add_requirement);
+
+
+int qos_update_requirement(int i, char *name, s32 new_value)
+{
+ unsigned long flags;
+ struct requirement_list *node;
+ int pending_update = 0;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ list_for_each_entry(node, &qos_array[i].requirements.list, list) {
+ if( strcmp(node->name, name) == 0) {
+ if (new_value == QOS_DEFAULT_VALUE)
+ node->value = qos_array[i].default_value;
+ else
+ node->value = new_value;
+ pending_update = 1;
+ break;
+ }
+ }
+ if(pending_update)
+ update_target(i);
+
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(qos_update_requirement);
+
+void qos_remove_requirement(int i, char *name)
+{
+ unsigned long flags;
+ struct requirement_list *node;
+ int pending_update = 0;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ list_for_each_entry(node, &qos_array[i].requirements.list, list) {
+ if( strcmp(node->name, name) == 0) {
+ kfree(node->name);
+ list_del(&node->list);
+ kfree(node);
+ pending_update = 1;
+ break;
+ }
+ }
+ if(pending_update)
+ update_target(i);
+ spin_unlock_irqrestore(&qos_lock, flags);
+}
+EXPORT_SYMBOL_GPL(qos_remove_requirement);
+
+int qos_add_notifier(int i, struct notifier_block *notifier)
+{
+ unsigned long flags;
+ int retval;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ retval = srcu_notifier_chain_register(&qos_array[i].notifiers, notifier);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(qos_add_notifier);
+
+int qos_remove_notifier(int i, struct notifier_block *notifier)
+{
+ unsigned long flags;
+ int retval;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ retval = srcu_notifier_chain_unregister(&qos_array[i].notifiers, notifier);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(qos_remove_notifier);
+
+#define PID_NAME_LEN sizeof("process_1234567890")
+static char name[PID_NAME_LEN];
+
+static int qos_power_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+ int i;
+
+ i = find_qos_object_by_minor(iminor(inode));
+ if (i >= 0) {
+ filp->private_data = (void *) i;
+ sprintf(name, "process_%d", current->pid);
+ ret = qos_add_requirement(i, name, QOS_DEFAULT_VALUE);
+ if (ret >= 0)
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+static int qos_power_release(struct inode *inode, struct file *filp)
+{
+ int qos;
+
+ qos = (int) filp->private_data;
+ sprintf(name, "process_%d", current->pid);
+ qos_remove_requirement(qos, name);
+
+ return 0;
+}
+
+static ssize_t qos_power_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ s32 value;
+ int qos;
+
+ qos = (int) filp->private_data;
+ if (count != sizeof(s32))
+ return -EINVAL;
+ if (copy_from_user(&value, buf, sizeof(s32))) {
+ return -EFAULT;
+ }
+ sprintf(name, "process_%d", current->pid);
+ qos_update_requirement(qos, name, value);
+
+ return sizeof(s32);
+}
+
+
+static int __init qos_power_init(void)
+{
+ new_latency_qos(QOS_CPU_DMA_LATENCY, "cpu_dma_latency");
+ new_latency_qos(QOS_NETWORK_LATENCY, "network_latency");
+ new_throughput_qos(QOS_NETWORK_THROUGHPUT, "network_throughput");
+
+ return 0;
+}
+
+late_initcall(qos_power_init);
The following patch replaces latency.c with qos_params.c and fixes up
users of latency to use qos_params
Signed-off-by: Mark Gross <[email protected]>
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c
--- linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c 2007-09-26 13:54:28.000000000 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c 2007-09-26 14:09:27.000000000 -0700
@@ -38,7 +38,7 @@
#include <linux/dmi.h>
#include <linux/moduleparam.h>
#include <linux/sched.h> /* need_resched() */
-#include <linux/latency.h>
+#include <linux/qos_params.h>
#include <linux/clockchips.h>
/*
@@ -605,7 +605,7 @@
if (cx->promotion.state &&
((cx->promotion.state - pr->power.states) <= max_cstate)) {
if (sleep_ticks > cx->promotion.threshold.ticks &&
- cx->promotion.state->latency <= system_latency_constraint()) {
+ cx->promotion.state->latency <= qos_requirement(QOS_CPU_DMA_LATENCY)) {
cx->promotion.count++;
cx->demotion.count = 0;
if (cx->promotion.count >=
@@ -649,7 +649,7 @@
* or if the latency of the current state is unacceptable
*/
if ((pr->power.state - pr->power.states) > max_cstate ||
- pr->power.state->latency > system_latency_constraint()) {
+ pr->power.state->latency > qos_requirement(QOS_CPU_DMA_LATENCY)) {
if (cx->demotion.state)
next_state = cx->demotion.state;
}
@@ -1173,7 +1173,7 @@
"maximum allowed latency: %d usec\n",
pr->power.state ? pr->power.state - pr->power.states : 0,
max_cstate, (unsigned)pr->power.bm_activity,
- system_latency_constraint());
+ qos_requirement(QOS_CPU_DMA_LATENCY));
seq_puts(seq, "states:\n");
@@ -1280,7 +1280,7 @@
max_cstate);
first_run++;
#ifdef CONFIG_SMP
- register_latency_notifier(&acpi_processor_latency_notifier);
+ qos_add_notifier(QOS_CPU_DMA_LATENCY, &acpi_processor_latency_notifier);
#endif
}
@@ -1354,7 +1354,7 @@
*/
cpu_idle_wait();
#ifdef CONFIG_SMP
- unregister_latency_notifier(&acpi_processor_latency_notifier);
+ qos_remove_notifier(QOS_CPU_DMA_LATENCY, &acpi_processor_latency_notifier);
#endif
}
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c
--- linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c 2007-09-26 13:54:34.000000000 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c 2007-09-26 14:09:27.000000000 -0700
@@ -162,7 +162,7 @@
#include <linux/firmware.h>
#include <linux/acpi.h>
#include <linux/ctype.h>
-#include <linux/latency.h>
+#include <linux/qos_params.h>
#include "ipw2100.h"
@@ -1701,7 +1701,7 @@
/* the ipw2100 hardware really doesn't want power management delays
* longer than 175usec
*/
- modify_acceptable_latency("ipw2100", 175);
+ qos_update_requirement(QOS_CPU_DMA_LATENCY, "ipw2100", 175);
/* If the interrupt is enabled, turn it off... */
spin_lock_irqsave(&priv->low_lock, flags);
@@ -1856,7 +1856,7 @@
ipw2100_disable_interrupts(priv);
spin_unlock_irqrestore(&priv->low_lock, flags);
- modify_acceptable_latency("ipw2100", INFINITE_LATENCY);
+ qos_update_requirement(QOS_CPU_DMA_LATENCY, "ipw2100", QOS_DEFAULT_VALUE);
#ifdef ACPI_CSTATE_LIMIT_DEFINED
if (priv->config & CFG_C3_DISABLED) {
@@ -6544,7 +6544,7 @@
if (ret)
goto out;
- set_acceptable_latency("ipw2100", INFINITE_LATENCY);
+ qos_add_requirement(QOS_CPU_DMA_LATENCY, "ipw2100", QOS_DEFAULT_VALUE);
#ifdef CONFIG_IPW2100_DEBUG
ipw2100_debug_level = debug;
ret = driver_create_file(&ipw2100_pci_driver.driver,
@@ -6566,7 +6566,7 @@
&driver_attr_debug_level);
#endif
pci_unregister_driver(&ipw2100_pci_driver);
- remove_acceptable_latency("ipw2100");
+ qos_remove_requirement(QOS_CPU_DMA_LATENCY, "ipw2100");
}
module_init(ipw2100_init);
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/include/linux/latency.h linux-2.6.23-rc8-qos-nolatency.c/include/linux/latency.h
--- linux-2.6.23-rc8-qos/include/linux/latency.h 2007-07-08 16:32:17.000000000 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/include/linux/latency.h 1969-12-31 16:00:00.000000000 -0800
@@ -1,25 +0,0 @@
-/*
- * latency.h: Explicit system-wide latency-expectation infrastructure
- *
- * (C) Copyright 2006 Intel Corporation
- * Author: Arjan van de Ven <[email protected]>
- *
- */
-
-#ifndef _INCLUDE_GUARD_LATENCY_H_
-#define _INCLUDE_GUARD_LATENCY_H_
-
-#include <linux/notifier.h>
-
-void set_acceptable_latency(char *identifier, int usecs);
-void modify_acceptable_latency(char *identifier, int usecs);
-void remove_acceptable_latency(char *identifier);
-void synchronize_acceptable_latency(void);
-int system_latency_constraint(void);
-
-int register_latency_notifier(struct notifier_block * nb);
-int unregister_latency_notifier(struct notifier_block * nb);
-
-#define INFINITE_LATENCY 1000000
-
-#endif
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/kernel/latency.c linux-2.6.23-rc8-qos-nolatency.c/kernel/latency.c
--- linux-2.6.23-rc8-qos/kernel/latency.c 2007-07-08 16:32:17.000000000 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/kernel/latency.c 1969-12-31 16:00:00.000000000 -0800
@@ -1,280 +0,0 @@
-/*
- * latency.c: Explicit system-wide latency-expectation infrastructure
- *
- * The purpose of this infrastructure is to allow device drivers to set
- * latency constraint they have and to collect and summarize these
- * expectations globally. The cummulated result can then be used by
- * power management and similar users to make decisions that have
- * tradoffs with a latency component.
- *
- * An example user of this are the x86 C-states; each higher C state saves
- * more power, but has a higher exit latency. For the idle loop power
- * code to make a good decision which C-state to use, information about
- * acceptable latencies is required.
- *
- * An example announcer of latency is an audio driver that knowns it
- * will get an interrupt when the hardware has 200 usec of samples
- * left in the DMA buffer; in that case the driver can set a latency
- * constraint of, say, 150 usec.
- *
- * Multiple drivers can each announce their maximum accepted latency,
- * to keep these appart, a string based identifier is used.
- *
- *
- * (C) Copyright 2006 Intel Corporation
- * Author: Arjan van de Ven <[email protected]>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <linux/latency.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/jiffies.h>
-#include <asm/atomic.h>
-
-struct latency_info {
- struct list_head list;
- int usecs;
- char *identifier;
-};
-
-/*
- * locking rule: all modifications to current_max_latency and
- * latency_list need to be done while holding the latency_lock.
- * latency_lock needs to be taken _irqsave.
- */
-static atomic_t current_max_latency;
-static DEFINE_SPINLOCK(latency_lock);
-
-static LIST_HEAD(latency_list);
-static BLOCKING_NOTIFIER_HEAD(latency_notifier);
-
-/*
- * This function returns the maximum latency allowed, which
- * happens to be the minimum of all maximum latencies on the
- * list.
- */
-static int __find_max_latency(void)
-{
- int min = INFINITE_LATENCY;
- struct latency_info *info;
-
- list_for_each_entry(info, &latency_list, list) {
- if (info->usecs < min)
- min = info->usecs;
- }
- return min;
-}
-
-/**
- * set_acceptable_latency - sets the maximum latency acceptable
- * @identifier: string that identifies this driver
- * @usecs: maximum acceptable latency for this driver
- *
- * This function informs the kernel that this device(driver)
- * can accept at most usecs latency. This setting is used for
- * power management and similar tradeoffs.
- *
- * This function sleeps and can only be called from process
- * context.
- * Calling this function with an existing identifier is valid
- * and will cause the existing latency setting to be changed.
- */
-void set_acceptable_latency(char *identifier, int usecs)
-{
- struct latency_info *info, *iter;
- unsigned long flags;
- int found_old = 0;
-
- info = kzalloc(sizeof(struct latency_info), GFP_KERNEL);
- if (!info)
- return;
- info->usecs = usecs;
- info->identifier = kstrdup(identifier, GFP_KERNEL);
- if (!info->identifier)
- goto free_info;
-
- spin_lock_irqsave(&latency_lock, flags);
- list_for_each_entry(iter, &latency_list, list) {
- if (strcmp(iter->identifier, identifier)==0) {
- found_old = 1;
- iter->usecs = usecs;
- break;
- }
- }
- if (!found_old)
- list_add(&info->list, &latency_list);
-
- if (usecs < atomic_read(¤t_max_latency))
- atomic_set(¤t_max_latency, usecs);
-
- spin_unlock_irqrestore(&latency_lock, flags);
-
- blocking_notifier_call_chain(&latency_notifier,
- atomic_read(¤t_max_latency), NULL);
-
- /*
- * if we inserted the new one, we're done; otherwise there was
- * an existing one so we need to free the redundant data
- */
- if (!found_old)
- return;
-
- kfree(info->identifier);
-free_info:
- kfree(info);
-}
-EXPORT_SYMBOL_GPL(set_acceptable_latency);
-
-/**
- * modify_acceptable_latency - changes the maximum latency acceptable
- * @identifier: string that identifies this driver
- * @usecs: maximum acceptable latency for this driver
- *
- * This function informs the kernel that this device(driver)
- * can accept at most usecs latency. This setting is used for
- * power management and similar tradeoffs.
- *
- * This function does not sleep and can be called in any context.
- * Trying to use a non-existing identifier silently gets ignored.
- *
- * Due to the atomic nature of this function, the modified latency
- * value will only be used for future decisions; past decisions
- * can still lead to longer latencies in the near future.
- */
-void modify_acceptable_latency(char *identifier, int usecs)
-{
- struct latency_info *iter;
- unsigned long flags;
-
- spin_lock_irqsave(&latency_lock, flags);
- list_for_each_entry(iter, &latency_list, list) {
- if (strcmp(iter->identifier, identifier) == 0) {
- iter->usecs = usecs;
- break;
- }
- }
- if (usecs < atomic_read(¤t_max_latency))
- atomic_set(¤t_max_latency, usecs);
- spin_unlock_irqrestore(&latency_lock, flags);
-}
-EXPORT_SYMBOL_GPL(modify_acceptable_latency);
-
-/**
- * remove_acceptable_latency - removes the maximum latency acceptable
- * @identifier: string that identifies this driver
- *
- * This function removes a previously set maximum latency setting
- * for the driver and frees up any resources associated with the
- * bookkeeping needed for this.
- *
- * This function does not sleep and can be called in any context.
- * Trying to use a non-existing identifier silently gets ignored.
- */
-void remove_acceptable_latency(char *identifier)
-{
- unsigned long flags;
- int newmax = 0;
- struct latency_info *iter, *temp;
-
- spin_lock_irqsave(&latency_lock, flags);
-
- list_for_each_entry_safe(iter, temp, &latency_list, list) {
- if (strcmp(iter->identifier, identifier) == 0) {
- list_del(&iter->list);
- newmax = iter->usecs;
- kfree(iter->identifier);
- kfree(iter);
- break;
- }
- }
-
- /* If we just deleted the system wide value, we need to
- * recalculate with a full search
- */
- if (newmax == atomic_read(¤t_max_latency)) {
- newmax = __find_max_latency();
- atomic_set(¤t_max_latency, newmax);
- }
- spin_unlock_irqrestore(&latency_lock, flags);
-}
-EXPORT_SYMBOL_GPL(remove_acceptable_latency);
-
-/**
- * system_latency_constraint - queries the system wide latency maximum
- *
- * This function returns the system wide maximum latency in
- * microseconds.
- *
- * This function does not sleep and can be called in any context.
- */
-int system_latency_constraint(void)
-{
- return atomic_read(¤t_max_latency);
-}
-EXPORT_SYMBOL_GPL(system_latency_constraint);
-
-/**
- * synchronize_acceptable_latency - recalculates all latency decisions
- *
- * This function will cause a callback to various kernel pieces that
- * will make those pieces rethink their latency decisions. This implies
- * that if there are overlong latencies in hardware state already, those
- * latencies get taken right now. When this call completes no overlong
- * latency decisions should be active anymore.
- *
- * Typical usecase of this is after a modify_acceptable_latency() call,
- * which in itself is non-blocking and non-synchronizing.
- *
- * This function blocks and should not be called with locks held.
- */
-
-void synchronize_acceptable_latency(void)
-{
- blocking_notifier_call_chain(&latency_notifier,
- atomic_read(¤t_max_latency), NULL);
-}
-EXPORT_SYMBOL_GPL(synchronize_acceptable_latency);
-
-/*
- * Latency notifier: this notifier gets called when a non-atomic new
- * latency value gets set. The expectation nof the caller of the
- * non-atomic set is that when the call returns, future latencies
- * are within bounds, so the functions on the notifier list are
- * expected to take the overlong latencies immediately, inside the
- * callback, and not make a overlong latency decision anymore.
- *
- * The callback gets called when the new latency value is made
- * active so system_latency_constraint() returns the new latency.
- */
-int register_latency_notifier(struct notifier_block * nb)
-{
- return blocking_notifier_chain_register(&latency_notifier, nb);
-}
-EXPORT_SYMBOL_GPL(register_latency_notifier);
-
-int unregister_latency_notifier(struct notifier_block * nb)
-{
- return blocking_notifier_chain_unregister(&latency_notifier, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_latency_notifier);
-
-static __init int latency_init(void)
-{
- atomic_set(¤t_max_latency, INFINITE_LATENCY);
- /*
- * we don't want by default to have longer latencies than 2 ticks,
- * since that would cause lost ticks
- */
- set_acceptable_latency("kernel", 2*1000000/HZ);
- return 0;
-}
-
-module_init(latency_init);
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/kernel/Makefile linux-2.6.23-rc8-qos-nolatency.c/kernel/Makefile
--- linux-2.6.23-rc8-qos/kernel/Makefile 2007-09-26 14:06:38.000000000 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/kernel/Makefile 2007-09-26 14:10:15.000000000 -0700
@@ -8,7 +8,7 @@
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
+ hrtimer.o rwsem.o nsproxy.o srcu.o die_notifier.o \
utsname.o qos_params.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/sound/core/pcm_native.c linux-2.6.23-rc8-qos-nolatency.c/sound/core/pcm_native.c
--- linux-2.6.23-rc8-qos/sound/core/pcm_native.c 2007-09-26 13:54:57.000000000 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/sound/core/pcm_native.c 2007-09-26 14:50:57.000000000 -0700
@@ -24,7 +24,7 @@
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/time.h>
-#include <linux/latency.h>
+#include <linux/qos_params.h>
#include <linux/uio.h>
#include <sound/core.h>
#include <sound/control.h>
@@ -447,9 +447,9 @@
snd_pcm_timer_resolution_change(substream);
runtime->status->state = SNDRV_PCM_STATE_SETUP;
- remove_acceptable_latency(substream->latency_id);
+ qos_remove_requirement(QOS_CPU_DMA_LATENCY, substream->latency_id);
if ((usecs = period_to_usecs(runtime)) >= 0)
- set_acceptable_latency(substream->latency_id, usecs);
+ qos_add_requirement(QOS_CPU_DMA_LATENCY, substream->latency_id, usecs);
return 0;
_error:
/* hardware might be unuseable from this time,
@@ -509,7 +509,7 @@
if (substream->ops->hw_free)
result = substream->ops->hw_free(substream);
runtime->status->state = SNDRV_PCM_STATE_OPEN;
- remove_acceptable_latency(substream->latency_id);
+ qos_remove_requirement(QOS_CPU_DMA_LATENCY, substream->latency_id);
return result;
}
The following patch is a bit of a hack to illustrate how the qos
parameter infrastructure can communication information to the e1000
driver to use to set interrupt consolidation policy as a function of
acceptable network latency.
Its just an example.
Signed-off-by: Mark Gross <[email protected]>
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos-nolatency.c/drivers/net/e1000/e1000_main.c linux-2.6.23-rc8-qos-apps/drivers/net/e1000/e1000_main.c
--- linux-2.6.23-rc8-qos-nolatency.c/drivers/net/e1000/e1000_main.c 2007-09-26 13:54:33.000000000 -0700
+++ linux-2.6.23-rc8-qos-apps/drivers/net/e1000/e1000_main.c 2007-09-26 15:00:17.000000000 -0700
@@ -27,6 +27,7 @@
*******************************************************************************/
#include "e1000.h"
+#include <linux/qos_params.h>
#include <net/ip6_checksum.h>
char e1000_driver_name[] = "e1000";
@@ -2764,6 +2765,7 @@
{
unsigned int retval = itr_setting;
struct e1000_hw *hw = &adapter->hw;
+ int requested_latency = qos_requirement(QOS_NETWORK_LATENCY);
if (unlikely(hw->mac_type < e1000_82540))
goto update_itr_done;
@@ -2803,6 +2805,13 @@
break;
}
+ if (requested_latency < 50)
+ retval = lowest_latency;
+ else if (requested_latency < 250)
+ retval = low_latency;
+ else
+ ; //don't change the current algorithm
+
update_itr_done:
return retval;
}
On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross wrote:
> The following is the qos_param patch that implements a genralization of
> latency.c.
>
Just some general comments (as on irc):
- use 'diffstat -p1 -w70' to summarize each patch
- use checkpatch.pl to check for coding style and other buglets
- has no API docs :(
> +/* assumes qos_lock is held */
> +static void update_target(int i)
I'd prefer a better arg name than 'i'.
> +{
---
~Randy
Phaedrus says that Quality is about caring.
On Wed, Sep 26, 2007 at 04:41:59PM -0700, Randy Dunlap wrote:
> On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross wrote:
>
> > The following is the qos_param patch that implements a genralization of
> > latency.c.
> >
>
> Just some general comments (as on irc):
>
> - use 'diffstat -p1 -w70' to summarize each patch
> - use checkpatch.pl to check for coding style and other buglets
done
> - has no API docs :(
not done yet.
>
>
>
>
> > +/* assumes qos_lock is held */
> > +static void update_target(int i)
>
> I'd prefer a better arg name than 'i'.
I do too, but i in this case is an Index.
> > +{
>
> ---
> ~Randy
> Phaedrus says that Quality is about caring.
updated patch:
Signed-off-by: mark gross <[email protected]>
Makefile | 2
linux/qos_params.h | 35 +++++
qos_params.c | 354 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 390 insertions(+), 1 deletion(-)
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/include/linux/qos_params.h linux-2.6.23-rc8-qos/include/linux/qos_params.h
--- linux-2.6.23-rc8/include/linux/qos_params.h 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.23-rc8-qos/include/linux/qos_params.h 2007-09-26 14:05:20.000000000 -0700
@@ -0,0 +1,35 @@
+/* interface for the qos_power infrastructure of the linux kernel.
+ *
+ * Mark Gross
+ */
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/miscdevice.h>
+
+struct requirement_list {
+ struct list_head list;
+ union {
+ s32 value;
+ s32 usec;
+ s32 kbps;
+ };
+ char *name;
+};
+
+#define QOS_RESERVED 0
+#define QOS_CPU_DMA_LATENCY 1
+#define QOS_NETWORK_LATENCY 2
+#define QOS_NETWORK_THROUGHPUT 3
+
+#define QOS_NUM_CLASSES 4
+#define QOS_DEFAULT_VALUE -1
+
+int qos_add_requirement(int qos, char *name, s32 value);
+int qos_update_requirement(int qos, char *name, s32 new_value);
+void qos_remove_requirement(int qos, char *name);
+
+int qos_requirement(int qos);
+
+int qos_add_notifier(int qos, struct notifier_block *notifier);
+int qos_remove_notifier(int qos, struct notifier_block *notifier);
+
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/Makefile linux-2.6.23-rc8-qos/kernel/Makefile
--- linux-2.6.23-rc8/kernel/Makefile 2007-09-26 13:54:54.000000000 -0700
+++ linux-2.6.23-rc8-qos/kernel/Makefile 2007-09-26 14:06:38.000000000 -0700
@@ -9,7 +9,7 @@
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
- utsname.o
+ utsname.o qos_params.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/qos_params.c linux-2.6.23-rc8-qos/kernel/qos_params.c
--- linux-2.6.23-rc8/kernel/qos_params.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.23-rc8-qos/kernel/qos_params.c 2007-09-26 17:09:44.000000000 -0700
@@ -0,0 +1,354 @@
+/*
+ * This module exposes the interface to kernel space for specifying
+ * QoS dependencies. It provides infrastructure for registration of:
+ *
+ * Dependents on a QoS value : register requirements
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based. Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ *
+ * There are 3 basic classes of QoS parameter: latency, timeout, throughput
+ * each have defined units:
+ * latency: usec
+ * timeout: usec <-- currently not used.
+ * throughput: kbs (kilo byte / sec)
+ *
+ * There are lists of qos_objects each one wrapping requirements, notifiers
+ *
+ * User mode requirements on a QOS parameter register themselves to the
+ * subsystem by opening the device node /dev/... and writing there request to
+ * the node. As long as the process holds a file handle open to the node the
+ * client continues to be accounted for. Upon file release the usermode
+ * requirement is removed and a new qos target is computed. This way when the
+ * requirement that the application has is cleaned up when closes the file
+ * pointer or exits the qos_object will get an opportunity to clean up.
+ *
+ * mark gross [email protected]
+ */
+
+#include <linux/qos_params.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+
+#include <linux/uaccess.h>
+
+/*
+ * locking rule: all changes to target_value or requirements or notifiers lists
+ * or qos_object list and qos_objects need to happen with qos_lock held, taken
+ * with _irqsave. One lock to rule them all
+ */
+
+struct qos_object {
+ struct requirement_list requirements;
+ struct srcu_notifier_head notifiers;
+ struct miscdevice qos_power_miscdev;
+ char *name;
+ s32 default_value;
+ s32 target_value;
+ s32 (*comparitor)(s32, s32);
+};
+static struct qos_object qos_array[QOS_NUM_CLASSES];
+static DEFINE_SPINLOCK(qos_lock);
+
+static ssize_t qos_power_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos);
+static int qos_power_open(struct inode *inode, struct file *filp);
+static int qos_power_release(struct inode *inode, struct file *filp);
+
+static const struct file_operations qos_power_fops = {
+ .write = qos_power_write,
+ .open = qos_power_open,
+ .release = qos_power_release,
+};
+
+/* static helper functions */
+static s32 max_compare(s32 v1, s32 v2)
+{
+ if (v1 < v2)
+ return v2;
+ else
+ return v1;
+}
+
+static s32 min_compare(s32 v1, s32 v2)
+{
+ if (v1 < v2)
+ return v1;
+ else
+ return v2;
+}
+
+/* assumes qos_lock is held */
+static void update_target(int i)
+{
+ s32 extreme_value;
+ struct requirement_list *node;
+
+ extreme_value = qos_array[i].default_value;
+ list_for_each_entry(node, &qos_array[i].requirements.list, list) {
+ extreme_value = qos_array[i].comparitor(
+ extreme_value, node->value);
+ }
+ if (qos_array[i].target_value != extreme_value) {
+ qos_array[i].target_value = extreme_value;
+ printk(KERN_ERR "new target for qos %d is %d\n", i,
+ qos_array[i].target_value);
+ srcu_notifier_call_chain(&qos_array[i].notifiers,
+ (unsigned long) qos_array[i].target_value, NULL);
+ }
+}
+
+static int register_new_qos_misc(struct qos_object *qos)
+{
+ int ret;
+
+ qos->qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
+ qos->qos_power_miscdev.name = qos->name;
+ qos->qos_power_miscdev.fops = &qos_power_fops;
+
+ ret = misc_register(& qos->qos_power_miscdev);
+ if (ret < 0)
+ printk(KERN_ERR "qos_power: misc_register returns %d.\n", ret);
+
+ return ret;
+}
+
+
+/* constructors */
+static void init_qos_object(int i, const char *name, s32 default_value,
+ s32 (*comparitor)(s32, s32))
+{
+ struct qos_object *qos = NULL;
+
+ if (i < QOS_NUM_CLASSES) {
+ qos = &qos_array[i];
+ qos->name = kstrdup(name, GFP_KERNEL);
+ if (!qos->name)
+ goto cleanup;
+
+ qos->default_value = default_value;
+ qos->target_value = default_value;
+ qos->comparitor = comparitor;
+ srcu_init_notifier_head(&qos->notifiers);
+ INIT_LIST_HEAD(&qos->requirements.list);
+ if (register_new_qos_misc(qos) < 0)
+ goto cleanup;
+ }
+ return;
+cleanup:
+ kfree(qos->name);
+}
+
+static int find_qos_object_by_minor(int minor)
+{
+ int i;
+
+ for (i = 0; i < QOS_NUM_CLASSES; i++) {
+ if (minor == qos_array[i].qos_power_miscdev.minor)
+ return i;
+ }
+ return -1;
+}
+
+static void new_latency_qos(int i, const char *name)
+{
+ init_qos_object(i, name, 2000 * USEC_PER_SEC, min_compare);
+ /* 2000 sec is about infinite */
+}
+
+static void new_throughput_qos(int i, const char *name)
+{
+ init_qos_object(i, name, 0, max_compare);
+}
+
+/* accessors: */
+
+int qos_requirement(int i)
+{
+ int ret_val;
+ unsigned long flags;
+ spin_lock_irqsave(&qos_lock, flags);
+
+ ret_val = qos_array[i].target_value;
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return ret_val;
+}
+EXPORT_SYMBOL_GPL(qos_requirement);
+
+
+int qos_add_requirement(int i, char *name, s32 value)
+{
+ struct requirement_list *dep;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+ if (dep) {
+ if (value == QOS_DEFAULT_VALUE)
+ dep->value = qos_array[i].default_value;
+ else
+ dep->value = value;
+ dep->name = kstrdup(name, GFP_KERNEL);
+ if (!dep->name)
+ goto cleanup;
+
+ list_add(&dep->list, &qos_array[i].requirements.list);
+ update_target(i);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return 0;
+ }
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+cleanup:
+ kfree(dep);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(qos_add_requirement);
+
+
+int qos_update_requirement(int i, char *name, s32 new_value)
+{
+ unsigned long flags;
+ struct requirement_list *node;
+ int pending_update = 0;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ list_for_each_entry(node, &qos_array[i].requirements.list, list) {
+ if (strcmp(node->name, name) == 0) {
+ if (new_value == QOS_DEFAULT_VALUE)
+ node->value = qos_array[i].default_value;
+ else
+ node->value = new_value;
+ pending_update = 1;
+ break;
+ }
+ }
+ if (pending_update)
+ update_target(i);
+
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(qos_update_requirement);
+
+void qos_remove_requirement(int i, char *name)
+{
+ unsigned long flags;
+ struct requirement_list *node;
+ int pending_update = 0;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ list_for_each_entry(node, &qos_array[i].requirements.list, list) {
+ if (strcmp(node->name, name) == 0) {
+ kfree(node->name);
+ list_del(&node->list);
+ kfree(node);
+ pending_update = 1;
+ break;
+ }
+ }
+ if (pending_update)
+ update_target(i);
+ spin_unlock_irqrestore(&qos_lock, flags);
+}
+EXPORT_SYMBOL_GPL(qos_remove_requirement);
+
+int qos_add_notifier(int i, struct notifier_block *notifier)
+{
+ unsigned long flags;
+ int retval;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ retval = srcu_notifier_chain_register(&qos_array[i].notifiers,
+ notifier);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(qos_add_notifier);
+
+int qos_remove_notifier(int i, struct notifier_block *notifier)
+{
+ unsigned long flags;
+ int retval;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ retval = srcu_notifier_chain_unregister(&qos_array[i].notifiers,
+ notifier);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(qos_remove_notifier);
+
+#define PID_NAME_LEN sizeof("process_1234567890")
+static char name[PID_NAME_LEN];
+
+static int qos_power_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+ int i;
+
+ i = find_qos_object_by_minor(iminor(inode));
+ if (i >= 0) {
+ filp->private_data = (void *) i;
+ sprintf(name, "process_%d", current->pid);
+ ret = qos_add_requirement(i, name, QOS_DEFAULT_VALUE);
+ if (ret >= 0)
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+static int qos_power_release(struct inode *inode, struct file *filp)
+{
+ int qos;
+
+ qos = (int) filp->private_data;
+ sprintf(name, "process_%d", current->pid);
+ qos_remove_requirement(qos, name);
+
+ return 0;
+}
+
+static ssize_t qos_power_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ s32 value;
+ int qos;
+
+ qos = (int) filp->private_data;
+ if (count != sizeof(s32))
+ return -EINVAL;
+ if (copy_from_user(&value, buf, sizeof(s32)))
+ return -EFAULT;
+ sprintf(name, "process_%d", current->pid);
+ qos_update_requirement(qos, name, value);
+
+ return sizeof(s32);
+}
+
+
+static int __init qos_power_init(void)
+{
+ new_latency_qos(QOS_CPU_DMA_LATENCY, "cpu_dma_latency");
+ new_latency_qos(QOS_NETWORK_LATENCY, "network_latency");
+ new_throughput_qos(QOS_NETWORK_THROUGHPUT, "network_throughput");
+
+ return 0;
+}
+
+late_initcall(qos_power_init);
On Wed, Sep 26, 2007 at 03:40:26PM -0700, Mark Gross wrote:
> + struct list_head list;
> + union {
> + s32 value;
> + s32 usec;
> + s32 kbps;
> + };
> + char *name;
Your } is in a strange place. It looks like it wants to join its friends
closer to the left margin ;-)
> +};
> +
> +#define QOS_RESERVED 0
> +#define QOS_CPU_DMA_LATENCY 1
> +#define QOS_NETWORK_LATENCY 2
> +#define QOS_NETWORK_THROUGHPUT 3
> +
> +#define QOS_NUM_CLASSES 4
> +#define QOS_DEFAULT_VALUE -1
an enum would be better for this, especially as people are likely to add
new types, having to update QOS_NUM_CLASSES each time sucks.
> +/* static helper functions */
> +static s32 max_compare(s32 v1, s32 v2)
> +{
> + if (v1 < v2)
> + return v2;
> + else
> + return v1;
> +}
> +
> +static s32 min_compare(s32 v1, s32 v2)
> +{
> + if (v1 < v2)
> + return v1;
> + else
> + return v2;
> +}
> +
min()/max() instead?
> +/* assumes qos_lock is held */
> +static void update_target(int i)
> +{
'target' might be a more meaningful variable name.
> + qos->qos_power_miscdev.fops = &qos_power_fops;
> +
> + ret = misc_register(& qos->qos_power_miscdev);
> + if (ret < 0 )
> + printk(KERN_ERR "qos_power: misc_register returns %d.\n", ret);
> +
> + return ret;
> +}
> +
Just return misc_register(...); ?
> + if( i < QOS_NUM_CLASSES) {
> + qos = &qos_array[i];
> + qos->name = kstrdup(name, GFP_KERNEL);
> + if(!qos->name)
> + goto cleanup;
> +
> + qos->default_value = default_value;
> + qos->target_value = default_value;
> + qos->comparitor = comparitor;
> + srcu_init_notifier_head(&qos->notifiers);
> + INIT_LIST_HEAD(&qos->requirements.list);
> + if (register_new_qos_misc(qos) < 0 )
> + goto cleanup;
> + }
> + return;
> +cleanup:
> + kfree(qos->name);
> +}
This leaks. You'll have to scan down from i and clean up the kstrdup()
per qos_array element. Presently this will only free the first one to fail
registration.
> +
> +int qos_add_requirement(int i, char *name, s32 value)
> +{
> + struct requirement_list * dep;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&qos_lock, flags);
> + dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
kmalloc() under a spinlock. GFP_KERNEL implies __GFP_WAIT, which can
sleep. Slab debugging would have caught this, too.
> + if (dep) {
> + if (value == QOS_DEFAULT_VALUE)
> + dep->value = qos_array[i].default_value;
> + else
> + dep->value = value;
> + dep->name = kstrdup(name, GFP_KERNEL);
And here also, still under the spinlock. You can probably rework the
locking just to protect the list, if you really need it at all, it
doesn't seem to matter anywhere else here.
> + if(!dep->name)
> + goto cleanup;
> +
> + list_add(&dep->list, &qos_array[i].requirements.list);
> + update_target(i);
> + spin_unlock_irqrestore(&qos_lock, flags);
> +
> + return 0;
> + }
> + spin_unlock_irqrestore(&qos_lock, flags);
> +
> +cleanup:
> + if(dep)
> + kfree(dep);
no if() needed.
> + return -ENOMEM;
> +}
> +EXPORT_SYMBOL_GPL(qos_add_requirement);
You also didn't spin_unlock_irqrestore() in the error path, so this bails
out with the lock held and IRQs disabled.
On Wed, 26 Sep 2007 17:40:20 PDT, Mark Gross said:
(others here are probably better at spotting leaks and races than I am,
so I'm skipping those and picking other nits. ;)
> --- linux-2.6.23-rc8/kernel/Makefile 2007-09-26 13:54:54.000000000 -0700
> +++ linux-2.6.23-rc8-qos/kernel/Makefile 2007-09-26 14:06:38.000000000 -
0700
> @@ -9,7 +9,7 @@
> rcupdate.o extable.o params.o posix-timers.o \
> kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
> hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
> - utsname.o
> + utsname.o qos_params.o
So I don't get a choice in the matter if I will be dragging this thing
around in my kernel, even if I have no intention of using the functionality?
> + * This QoS design is best effort based. Dependents register their QoS needs.
> + * Watchers register to keep track of the current QoS needs of the system.
> + *
> + * There are 3 basic classes of QoS parameter: latency, timeout, throughput
> + * each have defined units:
> + * latency: usec
> + * timeout: usec <-- currently not used.
> + * throughput: kbs (kilo byte / sec)
It's unclear whether these are registering a differing QoS request for each
process/container/whatever that asks for one, or if they're global across the
system. Also, even though it's "best effort", it would be good to document
what the failure mode is if we get conflicting requests, or an overcommit
situation - do new requests get refused, or allowed and ignored, or allowed
and only sometimes fulfilled. For instance, assume a gigabit ethernet,
and 3 processes ask for 400 mbits/sec each - who wins, who gets part of what
they asked for, and who loses and gets starved?
> + * User mode requirements on a QOS parameter register themselves to the
> + * subsystem by opening the device node /dev/... and writing there request to
> + * the node. As long as the process holds a file handle open to the node the
/dev? What /dev entry do you use for a network interface? Should this
be a configfs creature instead, or maybe something else?
> +/* static helper functions */
> +static s32 max_compare(s32 v1, s32 v2)
Blech. Is it time for the yearly stamp-out-reinvention-of-max() already?
The use of pointer functions is interesting, but I have to wonder if there's
not a better way...
> +#define PID_NAME_LEN sizeof("process_1234567890")
> +static char name[PID_NAME_LEN];
And then you just pass a pointer to this and kstrdup() it. Why not kmalloc()
the space initially and just 'dep->name = name;' and be done with it?
General nit - why qos_power_*, when none of the supported QoS parameters
seem to be power-related?
On Wed, Sep 26, 2007 at 10:53:03PM -0400, [email protected] wrote:
> On Wed, 26 Sep 2007 17:40:20 PDT, Mark Gross said:
> > --- linux-2.6.23-rc8/kernel/Makefile 2007-09-26 13:54:54.000000000 -0700
> > +++ linux-2.6.23-rc8-qos/kernel/Makefile 2007-09-26 14:06:38.000000000 -
> 0700
> > @@ -9,7 +9,7 @@
> > rcupdate.o extable.o params.o posix-timers.o \
> > kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
> > hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
> > - utsname.o
> > + utsname.o qos_params.o
>
> So I don't get a choice in the matter if I will be dragging this thing
> around in my kernel, even if I have no intention of using the functionality?
>
You don't get that option with latency.c either at the moment, and it's
arguable whether it's even worth it. The more curious thing is that while
this qos params seems to be an evolution of Arjan's latency.c (and the
drivers that are using it are updated in the rest of the patch set),
latency.c itself is still compiled in. Is this an oversight, or was it
intentional? One set of latency hinting APIs only, please :-)
On Thu, 27 Sep 2007 11:24:40 +0900 Paul Mundt wrote:
> > +/* static helper functions */
> > +static s32 max_compare(s32 v1, s32 v2)
> > +{
> > + if (v1 < v2)
> > + return v2;
> > + else
> > + return v1;
> > +}
> > +
> > +static s32 min_compare(s32 v1, s32 v2)
> > +{
> > + if (v1 < v2)
> > + return v1;
> > + else
> > + return v2;
> > +}
> > +
> min()/max() instead?
Other code wants function pointers to the min & max functions.
That's why they are here AFAICT.
> > +/* assumes qos_lock is held */
> > +static void update_target(int i)
> > +{
> 'target' might be a more meaningful variable name.
Anything but 'i'.
---
~Randy
Phaedrus says that Quality is about caring.
Mark Gross wrote:
> On Wed, Sep 26, 2007 at 04:41:59PM -0700, Randy Dunlap wrote:
>> On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross wrote:
>>
>>> The following is the qos_param patch that implements a genralization of
>>> latency.c.
>>>
>> Just some general comments (as on irc):
>>
>> - use 'diffstat -p1 -w70' to summarize each patch
>> - use checkpatch.pl to check for coding style and other buglets
>
> done
>
>> - has no API docs :(
> not done yet.
>>
>>
>>
>>> +/* assumes qos_lock is held */
>>> +static void update_target(int i)
>> I'd prefer a better arg name than 'i'.
>
> I do too, but i in this case is an Index.
I think in many cases you could use a pointer to qos_array[i] instead of
passing this index 'i' as a function argument.
Roel
On Thu, Sep 27, 2007 at 12:18:21PM +0900, Paul Mundt wrote:
> On Wed, Sep 26, 2007 at 10:53:03PM -0400, [email protected] wrote:
> > On Wed, 26 Sep 2007 17:40:20 PDT, Mark Gross said:
> > > --- linux-2.6.23-rc8/kernel/Makefile 2007-09-26 13:54:54.000000000 -0700
> > > +++ linux-2.6.23-rc8-qos/kernel/Makefile 2007-09-26 14:06:38.000000000 -
> > 0700
> > > @@ -9,7 +9,7 @@
> > > rcupdate.o extable.o params.o posix-timers.o \
> > > kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
> > > hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
> > > - utsname.o
> > > + utsname.o qos_params.o
> >
> > So I don't get a choice in the matter if I will be dragging this thing
> > around in my kernel, even if I have no intention of using the functionality?
> >
> You don't get that option with latency.c either at the moment, and it's
> arguable whether it's even worth it. The more curious thing is that while
> this qos params seems to be an evolution of Arjan's latency.c (and the
> drivers that are using it are updated in the rest of the patch set),
> latency.c itself is still compiled in. Is this an oversight, or was it
> intentional? One set of latency hinting APIs only, please :-)
It was intentional to compile latnecy.c in (and hence qos_parms is
starting off doing the same thing)
I agree, "there can be only be one". (couldn't resist)
I split the patch set up this first patch puts in the qos_param.c and
the second patch yanks latency.c replacing things with its interfaces.
--mgross
On Wed, Sep 26, 2007 at 10:53:03PM -0400, [email protected] wrote:
> On Wed, 26 Sep 2007 17:40:20 PDT, Mark Gross said:
> (others here are probably better at spotting leaks and races than I am,
> so I'm skipping those and picking other nits. ;)
>
> > --- linux-2.6.23-rc8/kernel/Makefile 2007-09-26 13:54:54.000000000 -0700
> > +++ linux-2.6.23-rc8-qos/kernel/Makefile 2007-09-26 14:06:38.000000000 -
> 0700
> > @@ -9,7 +9,7 @@
> > rcupdate.o extable.o params.o posix-timers.o \
> > kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
> > hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
> > - utsname.o
> > + utsname.o qos_params.o
>
> So I don't get a choice in the matter if I will be dragging this thing
> around in my kernel, even if I have no intention of using the functionality?
nope.
>
> > + * This QoS design is best effort based. Dependents register their QoS needs.
> > + * Watchers register to keep track of the current QoS needs of the system.
> > + *
> > + * There are 3 basic classes of QoS parameter: latency, timeout, throughput
> > + * each have defined units:
> > + * latency: usec
> > + * timeout: usec <-- currently not used.
> > + * throughput: kbs (kilo byte / sec)
>
> It's unclear whether these are registering a differing QoS request for each
> process/container/whatever that asks for one, or if they're global across the
> system. Also, even though it's "best effort", it would be good to document
> what the failure mode is if we get conflicting requests, or an overcommit
> situation - do new requests get refused, or allowed and ignored, or allowed
> and only sometimes fulfilled. For instance, assume a gigabit ethernet,
> and 3 processes ask for 400 mbits/sec each - who wins, who gets part of what
> they asked for, and who loses and gets starved?
There are no overcommit failure modes. These QoS parameters are for
constraining aggressive power management throttling from breaking things
thereby enabling better power management solutions.
The QoS values are system wide parameters, not per process or driver.
So the code takes the max or min depending on what class of parameter is
in question. So if you have multiple applications asking for network
throughput of 400Mbs the current code will let the nic driver find out
that it shouldn't throttle itself to the point where it can provide
400Mbs to any one.
You raise a perspective I overlooked. For each of the parameter classes
I had been thinking of computing some extrema out of the set of
requests. However; for throughput it may make more sense to sum up a
collative value.
I'm not sure which way would make more sense.
--mgross
>
> > + * User mode requirements on a QOS parameter register themselves to the
> > + * subsystem by opening the device node /dev/... and writing there request to
> > + * the node. As long as the process holds a file handle open to the node the
>
> /dev? What /dev entry do you use for a network interface? Should this
> be a configfs creature instead, or maybe something else?
>
> > +/* static helper functions */
> > +static s32 max_compare(s32 v1, s32 v2)
>
> Blech. Is it time for the yearly stamp-out-reinvention-of-max() already?
> The use of pointer functions is interesting, but I have to wonder if there's
> not a better way...
>
> > +#define PID_NAME_LEN sizeof("process_1234567890")
> > +static char name[PID_NAME_LEN];
>
> And then you just pass a pointer to this and kstrdup() it. Why not kmalloc()
> the space initially and just 'dep->name = name;' and be done with it?
>
> General nit - why qos_power_*, when none of the supported QoS parameters
> seem to be power-related?
On Wed, Sep 26, 2007 at 09:05:15PM -0700, Randy Dunlap wrote:
> On Thu, 27 Sep 2007 11:24:40 +0900 Paul Mundt wrote:
>
> > > +/* static helper functions */
> > > +static s32 max_compare(s32 v1, s32 v2)
> > > +{
> > > + if (v1 < v2)
> > > + return v2;
> > > + else
> > > + return v1;
> > > +}
> > > +
> > > +static s32 min_compare(s32 v1, s32 v2)
> > > +{
> > > + if (v1 < v2)
> > > + return v1;
> > > + else
> > > + return v2;
> > > +}
> > > +
> > min()/max() instead?
>
> Other code wants function pointers to the min & max functions.
> That's why they are here AFAICT.
>
>
> > > +/* assumes qos_lock is held */
> > > +static void update_target(int i)
> > > +{
> > 'target' might be a more meaningful variable name.
>
> Anything but 'i'.
The 'i' is gone. Will post new patch later today.
--mgross
On Thu, Sep 27, 2007 at 11:24:40AM +0900, Paul Mundt wrote:
> On Wed, Sep 26, 2007 at 03:40:26PM -0700, Mark Gross wrote:
> > + struct list_head list;
> > + union {
> > + s32 value;
> > + s32 usec;
> > + s32 kbps;
> > + };
> > + char *name;
>
> Your } is in a strange place. It looks like it wants to join its friends
> closer to the left margin ;-)
fixed.
>
> > +};
> > +
> > +#define QOS_RESERVED 0
> > +#define QOS_CPU_DMA_LATENCY 1
> > +#define QOS_NETWORK_LATENCY 2
> > +#define QOS_NETWORK_THROUGHPUT 3
> > +
> > +#define QOS_NUM_CLASSES 4
> > +#define QOS_DEFAULT_VALUE -1
>
> an enum would be better for this, especially as people are likely to add
> new types, having to update QOS_NUM_CLASSES each time sucks.
I'm partial to the simplicity of not using an enum in this case. We do
want it to be somewhat non-trivial to add qos classes.
>
> > +/* static helper functions */
> > +static s32 max_compare(s32 v1, s32 v2)
> > +{
> > + if (v1 < v2)
> > + return v2;
> > + else
> > + return v1;
> > +}
> > +
> > +static s32 min_compare(s32 v1, s32 v2)
> > +{
> > + if (v1 < v2)
> > + return v1;
> > + else
> > + return v2;
> > +}
> > +
> min()/max() instead?
done.
>
> > +/* assumes qos_lock is held */
> > +static void update_target(int i)
> > +{
> 'target' might be a more meaningful variable name.
done.
>
> > + qos->qos_power_miscdev.fops = &qos_power_fops;
> > +
> > + ret = misc_register(& qos->qos_power_miscdev);
> > + if (ret < 0 )
> > + printk(KERN_ERR "qos_power: misc_register returns %d.\n", ret);
> > +
> > + return ret;
> > +}
> > +
> Just return misc_register(...); ?
ok.
>
> > + if( i < QOS_NUM_CLASSES) {
> > + qos = &qos_array[i];
> > + qos->name = kstrdup(name, GFP_KERNEL);
> > + if(!qos->name)
> > + goto cleanup;
> > +
> > + qos->default_value = default_value;
> > + qos->target_value = default_value;
> > + qos->comparitor = comparitor;
> > + srcu_init_notifier_head(&qos->notifiers);
> > + INIT_LIST_HEAD(&qos->requirements.list);
> > + if (register_new_qos_misc(qos) < 0 )
> > + goto cleanup;
> > + }
> > + return;
> > +cleanup:
> > + kfree(qos->name);
> > +}
>
> This leaks. You'll have to scan down from i and clean up the kstrdup()
> per qos_array element. Presently this will only free the first one to fail
> registration.
Not seeing the leak here. this is an if block not a for loop.
>
> > +
> > +int qos_add_requirement(int i, char *name, s32 value)
> > +{
> > + struct requirement_list * dep;
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&qos_lock, flags);
> > + dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
>
> kmalloc() under a spinlock. GFP_KERNEL implies __GFP_WAIT, which can
> sleep. Slab debugging would have caught this, too.
>
/me turns on slab debugging.
> > + if (dep) {
> > + if (value == QOS_DEFAULT_VALUE)
> > + dep->value = qos_array[i].default_value;
> > + else
> > + dep->value = value;
> > + dep->name = kstrdup(name, GFP_KERNEL);
>
> And here also, still under the spinlock. You can probably rework the
> locking just to protect the list, if you really need it at all, it
> doesn't seem to matter anywhere else here.
>
Re-worked. I didn't need the lock to be held as long as I had it.
Thanks for catching this!
> > + if(!dep->name)
> > + goto cleanup;
> > +
> > + list_add(&dep->list, &qos_array[i].requirements.list);
> > + update_target(i);
> > + spin_unlock_irqrestore(&qos_lock, flags);
> > +
> > + return 0;
> > + }
> > + spin_unlock_irqrestore(&qos_lock, flags);
> > +
> > +cleanup:
> > + if(dep)
> > + kfree(dep);
>
> no if() needed.
fixed.
>
> > + return -ENOMEM;
> > +}
> > +EXPORT_SYMBOL_GPL(qos_add_requirement);
>
> You also didn't spin_unlock_irqrestore() in the error path, so this bails
> out with the lock held and IRQs disabled.
fixed by the re-work.
--mgross
On Wed, Sep 26, 2007 at 09:05:15PM -0700, Randy Dunlap wrote:
> On Thu, 27 Sep 2007 11:24:40 +0900 Paul Mundt wrote:
>
> > > +/* static helper functions */
> > > +static s32 max_compare(s32 v1, s32 v2)
> > > +{
> > > + if (v1 < v2)
> > > + return v2;
> > > + else
> > > + return v1;
> > > +}
> > > +
> > > +static s32 min_compare(s32 v1, s32 v2)
> > > +{
> > > + if (v1 < v2)
> > > + return v1;
> > > + else
> > > + return v2;
> > > +}
> > > +
> > min()/max() instead?
>
> Other code wants function pointers to the min & max functions.
> That's why they are here AFAICT.
>
>
> > > +/* assumes qos_lock is held */
> > > +static void update_target(int i)
> > > +{
> > 'target' might be a more meaningful variable name.
>
> Anything but 'i'.
>
> ---
Updated qos PM parameter patch:
Note: the replacing of latency.c with this is a separate patch.
this patch attempts to address the issues raised so far.
--mgross
Signed-off-by: mark gross <[email protected]>
mgross@mgross-t43:~/workstuff/power_qos$ diffstat -p1 qos_params_sept_27.patch
include/linux/qos_params.h | 35 +++
kernel/Makefile | 2
kernel/qos_params.c | 413 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 449 insertions(+), 1 deletion(-)
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/include/linux/qos_params.h linux-2.6.23-rc8-qos/include/linux/qos_params.h
--- linux-2.6.23-rc8/include/linux/qos_params.h 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.23-rc8-qos/include/linux/qos_params.h 2007-09-27 08:42:54.000000000 -0700
@@ -0,0 +1,35 @@
+/* interface for the qos_power infrastructure of the linux kernel.
+ *
+ * Mark Gross
+ */
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/miscdevice.h>
+
+struct requirement_list {
+ struct list_head list;
+ union {
+ s32 value;
+ s32 usec;
+ s32 kbps;
+ };
+ char *name;
+};
+
+#define QOS_RESERVED 0
+#define QOS_CPU_DMA_LATENCY 1
+#define QOS_NETWORK_LATENCY 2
+#define QOS_NETWORK_THROUGHPUT 3
+
+#define QOS_NUM_CLASSES 4
+#define QOS_DEFAULT_VALUE -1
+
+int qos_add_requirement(int qos, char *name, s32 value);
+int qos_update_requirement(int qos, char *name, s32 new_value);
+void qos_remove_requirement(int qos, char *name);
+
+int qos_requirement(int qos);
+
+int qos_add_notifier(int qos, struct notifier_block *notifier);
+int qos_remove_notifier(int qos, struct notifier_block *notifier);
+
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/Makefile linux-2.6.23-rc8-qos/kernel/Makefile
--- linux-2.6.23-rc8/kernel/Makefile 2007-09-26 13:54:54.000000000 -0700
+++ linux-2.6.23-rc8-qos/kernel/Makefile 2007-09-26 14:06:38.000000000 -0700
@@ -9,7 +9,7 @@
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
- utsname.o
+ utsname.o qos_params.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/qos_params.c linux-2.6.23-rc8-qos/kernel/qos_params.c
--- linux-2.6.23-rc8/kernel/qos_params.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.23-rc8-qos/kernel/qos_params.c 2007-09-27 12:46:13.000000000 -0700
@@ -0,0 +1,413 @@
+/*
+ * This module exposes the interface to kernel space for specifying
+ * QoS dependencies. It provides infrastructure for registration of:
+ *
+ * Dependents on a QoS value : register requirements
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based. Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ *
+ * There are 3 basic classes of QoS parameter: latency, timeout, throughput
+ * each have defined units:
+ * latency: usec
+ * timeout: usec <-- currently not used.
+ * throughput: kbs (kilo byte / sec)
+ *
+ * There are lists of qos_objects each one wrapping requirements, notifiers
+ *
+ * User mode requirements on a QOS parameter register themselves to the
+ * subsystem by opening the device node /dev/... and writing there request to
+ * the node. As long as the process holds a file handle open to the node the
+ * client continues to be accounted for. Upon file release the usermode
+ * requirement is removed and a new qos target is computed. This way when the
+ * requirement that the application has is cleaned up when closes the file
+ * pointer or exits the qos_object will get an opportunity to clean up.
+ *
+ * mark gross [email protected]
+ */
+
+#include <linux/qos_params.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+
+#include <linux/uaccess.h>
+
+/*
+ * locking rule: all changes to target_value or requirements or notifiers lists
+ * or qos_object list and qos_objects need to happen with qos_lock held, taken
+ * with _irqsave. One lock to rule them all
+ */
+
+struct qos_object {
+ struct requirement_list requirements;
+ struct srcu_notifier_head notifiers;
+ struct miscdevice qos_power_miscdev;
+ char *name;
+ s32 default_value;
+ s32 target_value;
+ s32 (*comparitor)(s32, s32);
+};
+static struct qos_object qos_array[QOS_NUM_CLASSES];
+static DEFINE_SPINLOCK(qos_lock);
+
+static ssize_t qos_power_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos);
+static int qos_power_open(struct inode *inode, struct file *filp);
+static int qos_power_release(struct inode *inode, struct file *filp);
+
+static const struct file_operations qos_power_fops = {
+ .write = qos_power_write,
+ .open = qos_power_open,
+ .release = qos_power_release,
+};
+
+/* static helper functions */
+static s32 max_compare(s32 v1, s32 v2)
+{
+ return max(v1, v2);
+}
+
+static s32 min_compare(s32 v1, s32 v2)
+{
+ return min(v1, v2);
+}
+
+/* assumes qos_lock is held */
+static void update_target(int target)
+{
+ s32 extreme_value;
+ struct requirement_list *node;
+
+ extreme_value = qos_array[target].default_value;
+ list_for_each_entry(node, &qos_array[target].requirements.list, list) {
+ extreme_value = qos_array[target].comparitor(
+ extreme_value, node->value);
+ }
+ if (qos_array[target].target_value != extreme_value) {
+ qos_array[target].target_value = extreme_value;
+ printk(KERN_ERR "new target for qos %d is %d\n", target,
+ qos_array[target].target_value);
+ srcu_notifier_call_chain(&qos_array[target].notifiers,
+ (unsigned long) qos_array[target].target_value, NULL);
+ }
+}
+
+static int register_new_qos_misc(struct qos_object *qos)
+{
+ int ret;
+
+ qos->qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
+ qos->qos_power_miscdev.name = qos->name;
+ qos->qos_power_miscdev.fops = &qos_power_fops;
+
+ ret = misc_register(&qos->qos_power_miscdev);
+
+ return ret;
+}
+
+
+/* constructors */
+static int init_qos_object(int qos_class, const char *name, s32 default_value,
+ s32 (*comparitor)(s32, s32))
+{
+ int ret = -ENOMEM;
+ struct qos_object *qos = NULL;
+
+ if (qos_class < QOS_NUM_CLASSES) {
+ qos = &qos_array[qos_class];
+ qos->name = kstrdup(name, GFP_KERNEL);
+ if (!qos->name)
+ goto cleanup;
+
+ qos->default_value = default_value;
+ qos->target_value = default_value;
+ qos->comparitor = comparitor;
+ srcu_init_notifier_head(&qos->notifiers);
+ INIT_LIST_HEAD(&qos->requirements.list);
+ ret = register_new_qos_misc(qos);
+ if (ret < 0)
+ goto cleanup;
+ } else
+ ret = -EINVAL;
+
+ return ret;
+cleanup:
+ kfree(qos->name);
+
+ return ret;
+}
+
+static int find_qos_object_by_minor(int minor)
+{
+ int qos_class;
+
+ for (qos_class = 0; qos_class < QOS_NUM_CLASSES; qos_class++) {
+ if (minor == qos_array[qos_class].qos_power_miscdev.minor)
+ return qos_class;
+ }
+ return -1;
+}
+
+static int new_latency_qos(int qos_class, const char *name)
+{
+ return init_qos_object(qos_class, name, 2000 * USEC_PER_SEC,
+ min_compare);
+ /* 2000 sec is about infinite */
+}
+
+static int new_throughput_qos(int qos_class, const char *name)
+{
+ return init_qos_object(qos_class, name, 0, max_compare);
+}
+
+/**
+ * qos_requirement - returns current system wide qos expectation
+ * @qos_class: identification of which qos value is requested
+ *
+ * This function returns the current target value in an atomic manner.
+ */
+int qos_requirement(int qos_class)
+{
+ int ret_val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ ret_val = qos_array[qos_class].target_value;
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return ret_val;
+}
+EXPORT_SYMBOL_GPL(qos_requirement);
+
+/**
+ * qos_add_requirement - inserts new qos request into the list
+ * @qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ * @value: defines the qos request
+ *
+ * This function inserts a new entry in the qos_class list of requested qos
+ * performance charactoistics. It recomputes the agregate QoS expectations for
+ * the qos_class of parrameters.
+ */
+int qos_add_requirement(int qos_class, char *name, s32 value)
+{
+ struct requirement_list *dep;
+ unsigned long flags;
+
+ dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+ if (dep) {
+ if (value == QOS_DEFAULT_VALUE)
+ dep->value = qos_array[qos_class].default_value;
+ else
+ dep->value = value;
+ dep->name = kstrdup(name, GFP_KERNEL);
+ if (!dep->name)
+ goto cleanup;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ list_add(&dep->list, &qos_array[qos_class].requirements.list);
+ update_target(qos_class);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return 0;
+ }
+
+cleanup:
+ kfree(dep);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(qos_add_requirement);
+
+/**
+ * qos_update_requirement - modifies an existing qos request
+ * @qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ * @value: defines the qos request
+ *
+ * Updates an existing qos requierement for the qos_class of parameters along
+ * with updating the target qos_class value.
+ *
+ * If the named request isn't in the lest then no change is made.
+ */
+int qos_update_requirement(int qos_class, char *name, s32 new_value)
+{
+ unsigned long flags;
+ struct requirement_list *node;
+ int pending_update = 0;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ list_for_each_entry(node,
+ &qos_array[qos_class].requirements.list, list) {
+ if (strcmp(node->name, name) == 0) {
+ if (new_value == QOS_DEFAULT_VALUE)
+ node->value =
+ qos_array[qos_class].default_value;
+ else
+ node->value = new_value;
+ pending_update = 1;
+ break;
+ }
+ }
+ if (pending_update)
+ update_target(qos_class);
+
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(qos_update_requirement);
+
+/**
+ * qos_remove_requirement - modifies an existing qos request
+ * @qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ *
+ * Will remove named qos request from qos_class list of parrameters and
+ * recompute the current target value for the qos_class.
+ */
+void qos_remove_requirement(int qos_class, char *name)
+{
+ unsigned long flags;
+ struct requirement_list *node;
+ int pending_update = 0;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ list_for_each_entry(node,
+ &qos_array[qos_class].requirements.list, list) {
+ if (strcmp(node->name, name) == 0) {
+ kfree(node->name);
+ list_del(&node->list);
+ kfree(node);
+ pending_update = 1;
+ break;
+ }
+ }
+ if (pending_update)
+ update_target(qos_class);
+ spin_unlock_irqrestore(&qos_lock, flags);
+}
+EXPORT_SYMBOL_GPL(qos_remove_requirement);
+
+/**
+ * qos_add_notifier - sets notification entry for changes to target value
+ * @qos_class: identifies which qos target changes should be notified.
+ * @notifier: notifier block managed by caller.
+ *
+ * will register the notifier into a notification chain that gets called
+ * uppon changes to the qos_class target value.
+ */
+ int qos_add_notifier(int qos_class, struct notifier_block *notifier)
+{
+ unsigned long flags;
+ int retval;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ retval = srcu_notifier_chain_register(&qos_array[qos_class].notifiers,
+ notifier);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(qos_add_notifier);
+
+/**
+ * qos_remove_notifier - deletes notification entry from chain.
+ * @qos_class: identifies which qos target changes are notified.
+ * @notifier: notifier block to be removed.
+ *
+ * will remove the notifier from the notification chain that gets called
+ * uppon changes to the qos_class target value.
+ */
+int qos_remove_notifier(int qos_class, struct notifier_block *notifier)
+{
+ unsigned long flags;
+ int retval;
+
+ spin_lock_irqsave(&qos_lock, flags);
+ retval = srcu_notifier_chain_unregister(&qos_array[qos_class].notifiers,
+ notifier);
+ spin_unlock_irqrestore(&qos_lock, flags);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(qos_remove_notifier);
+
+#define PID_NAME_LEN sizeof("process_1234567890")
+static char name[PID_NAME_LEN];
+
+static int qos_power_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+ int qos_class;
+
+ qos_class = find_qos_object_by_minor(iminor(inode));
+ if (qos_class >= 0) {
+ filp->private_data = (void *) qos_class;
+ sprintf(name, "process_%d", current->pid);
+ ret = qos_add_requirement(qos_class, name, QOS_DEFAULT_VALUE);
+ if (ret >= 0)
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+static int qos_power_release(struct inode *inode, struct file *filp)
+{
+ int qos_class;
+
+ qos_class = (int) filp->private_data;
+ sprintf(name, "process_%d", current->pid);
+ qos_remove_requirement(qos_class, name);
+
+ return 0;
+}
+
+static ssize_t qos_power_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ s32 value;
+ int qos_class;
+
+ qos_class = (int) filp->private_data;
+ if (count != sizeof(s32))
+ return -EINVAL;
+ if (copy_from_user(&value, buf, sizeof(s32)))
+ return -EFAULT;
+ sprintf(name, "process_%d", current->pid);
+ qos_update_requirement(qos_class, name, value);
+
+ return sizeof(s32);
+}
+
+
+static int __init qos_power_init(void)
+{
+ int ret = 0;
+ ret = new_latency_qos(QOS_CPU_DMA_LATENCY, "cpu_dma_latency");
+ if (ret < 0) {
+ printk(KERN_ERR "qos_param: cpu_dma_latency setup failed\n");
+ return ret;
+ }
+ ret = new_latency_qos(QOS_NETWORK_LATENCY, "network_latency");
+ if (ret < 0) {
+ printk(KERN_ERR "qos_param: network_latency setup failed\n");
+ return ret;
+ }
+ ret = new_throughput_qos(QOS_NETWORK_THROUGHPUT, "network_throughput");
+ if (ret < 0)
+ printk(KERN_ERR "qos_param: network_throughput setup failed\n");
+
+ return ret;
+}
+
+late_initcall(qos_power_init);
On Thu, Sep 27, 2007 at 01:17:39PM -0700, Mark Gross wrote:
> Updated qos PM parameter patch:
> Note: the replacing of latency.c with this is a separate patch.
>
> this patch attempts to address the issues raised so far.
>
[snip]
> +static int register_new_qos_misc(struct qos_object *qos)
> +{
> + int ret;
> +
> + qos->qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
> + qos->qos_power_miscdev.name = qos->name;
> + qos->qos_power_miscdev.fops = &qos_power_fops;
> +
> + ret = misc_register(&qos->qos_power_miscdev);
> +
> + return ret;
> +}
> +
Minor nit, ret is a pointless variable here, you can just return
misc_register directly.
Other than that, this looks much better!
On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross <[email protected]> wrote:
> +#define QOS_RESERVED 0
> +#define QOS_CPU_DMA_LATENCY 1
> +#define QOS_NETWORK_LATENCY 2
> +#define QOS_NETWORK_THROUGHPUT 3
> +
> +#define QOS_NUM_CLASSES 4
> +#define QOS_DEFAULT_VALUE -1
> +
> +int qos_add_requirement(int qos, char *name, s32 value);
> +int qos_update_requirement(int qos, char *name, s32 new_value);
> +void qos_remove_requirement(int qos, char *name);
It's a bit rude stealing the entire "qos" namespace like this - there are
many different forms of QoS, some already in-kernel.
s/qos/pm_qos/g ?
On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote:
> On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross <[email protected]> wrote:
> > +int qos_add_requirement(int qos, char *name, s32 value);
> > +int qos_update_requirement(int qos, char *name, s32 new_value);
> > +void qos_remove_requirement(int qos, char *name);
>
> It's a bit rude stealing the entire "qos" namespace like this - there are
> many different forms of QoS, some already in-kernel.
>
> s/qos/pm_qos/g ?
lat_qos or something might be more suitable.. it's a latency property, not
a power management one (even if pm ends up being the primary user of it).
On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote:
> On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross <[email protected]> wrote:
>
> > +#define QOS_RESERVED 0
> > +#define QOS_CPU_DMA_LATENCY 1
> > +#define QOS_NETWORK_LATENCY 2
> > +#define QOS_NETWORK_THROUGHPUT 3
> > +
> > +#define QOS_NUM_CLASSES 4
> > +#define QOS_DEFAULT_VALUE -1
> > +
> > +int qos_add_requirement(int qos, char *name, s32 value);
> > +int qos_update_requirement(int qos, char *name, s32 new_value);
> > +void qos_remove_requirement(int qos, char *name);
>
> It's a bit rude stealing the entire "qos" namespace like this - there are
> many different forms of QoS, some already in-kernel.
>
> s/qos/pm_qos/g ?
I suppose it is a bit inconiderate. I could grow to like pm_qos,
performance_throttling_constraint_hint_infrastructure is a bit too
wordy.
I suppose I should use qospm as thats the way it was put up on that
lesswatts.org web page.
Would qospm be good enough?
--mgross
On Fri, Sep 28, 2007 at 03:41:13PM +0900, Paul Mundt wrote:
> On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote:
> > On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross <[email protected]> wrote:
> > > +int qos_add_requirement(int qos, char *name, s32 value);
> > > +int qos_update_requirement(int qos, char *name, s32 new_value);
> > > +void qos_remove_requirement(int qos, char *name);
> >
> > It's a bit rude stealing the entire "qos" namespace like this - there are
> > many different forms of QoS, some already in-kernel.
> >
> > s/qos/pm_qos/g ?
>
> lat_qos or something might be more suitable.. it's a latency property, not
> a power management one (even if pm ends up being the primary user of it).
Its not just latency otherwise I'd agree with you. right now I'm
thinking of changing things to "qospm" unless there folks feel strongly
about pm_qos.
--mgross
On Fri, 28 Sep 2007 10:19:21 -0700 Mark Gross <[email protected]> wrote:
> On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote:
> > On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross <[email protected]> wrote:
> >
> > > +#define QOS_RESERVED 0
> > > +#define QOS_CPU_DMA_LATENCY 1
> > > +#define QOS_NETWORK_LATENCY 2
> > > +#define QOS_NETWORK_THROUGHPUT 3
> > > +
> > > +#define QOS_NUM_CLASSES 4
> > > +#define QOS_DEFAULT_VALUE -1
> > > +
> > > +int qos_add_requirement(int qos, char *name, s32 value);
> > > +int qos_update_requirement(int qos, char *name, s32 new_value);
> > > +void qos_remove_requirement(int qos, char *name);
> >
> > It's a bit rude stealing the entire "qos" namespace like this - there are
> > many different forms of QoS, some already in-kernel.
> >
> > s/qos/pm_qos/g ?
>
> I suppose it is a bit inconiderate. I could grow to like pm_qos,
> performance_throttling_constraint_hint_infrastructure is a bit too
> wordy.
>
> I suppose I should use qospm as thats the way it was put up on that
> lesswatts.org web page.
>
> Would qospm be good enough?
>
Don't think it matters a lot, but kernel naming tends to be big-endian (ie:
we have net_ratelimit, not ratelimit_net), so the major part (pm) would
come first under that scheme.
On Fri, Sep 28, 2007 at 11:51:41AM -0700, Andrew Morton wrote:
> On Fri, 28 Sep 2007 10:19:21 -0700 Mark Gross <[email protected]> wrote:
>
> > On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote:
> > > On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross <[email protected]> wrote:
> > >
> > > > +#define QOS_RESERVED 0
> > > > +#define QOS_CPU_DMA_LATENCY 1
> > > > +#define QOS_NETWORK_LATENCY 2
> > > > +#define QOS_NETWORK_THROUGHPUT 3
> > > > +
> > > > +#define QOS_NUM_CLASSES 4
> > > > +#define QOS_DEFAULT_VALUE -1
> > > > +
> > > > +int qos_add_requirement(int qos, char *name, s32 value);
> > > > +int qos_update_requirement(int qos, char *name, s32 new_value);
> > > > +void qos_remove_requirement(int qos, char *name);
> > >
> > > It's a bit rude stealing the entire "qos" namespace like this - there are
> > > many different forms of QoS, some already in-kernel.
> > >
> > > s/qos/pm_qos/g ?
> >
> > I suppose it is a bit inconiderate. I could grow to like pm_qos,
> > performance_throttling_constraint_hint_infrastructure is a bit too
> > wordy.
> >
> > I suppose I should use qospm as thats the way it was put up on that
> > lesswatts.org web page.
> >
> > Would qospm be good enough?
> >
>
> Don't think it matters a lot, but kernel naming tends to be big-endian (ie:
> we have net_ratelimit, not ratelimit_net), so the major part (pm) would
> come first under that scheme.
this makes sense. pm_qos it is.
--mgross