Following this comment [1], this updates thermal_of to support multiple
sensors.
This series intends to add support of thermal aggregation.
One use case for it is using the IPA in the case we have
multiple sensors for one performance domain.
This has been tested on the mt8195 using s-tui.
To test and validate, we heat up the CPU and the heat sink.
At some point, we run benchmark tests with different configurations:
- Mediatek kernel (IPA + their own thermal aggregation)
- Mainline kernel
- Mainline kernel with IPA and aggregation enabled
With the IPA and the aggregation enabled, we get the best performances
with the most stable CPU temperature.
The aggregation is configured and enabled using device tree.
One thermal zone has to be created with a list of sensors.
It will take care of registering a thermal zone for each sensors.
The cooling device will only be registered with the aggregating thermal
zone.
There are still something important missing: a way to check that all
aggregated sensors are part of the same performance domain.
So far, I don't see how this should be done. Some recommendations would be
appreciated.
Changes in v2:
- Rebased on 6.7
- Separated generic multi sensor and dt specific code
- Simplified the code
- Drop min / max and only do weighted average (seems more adequate for IPA)
Changes in v3:
- Rebased on 6.9
- Reworked the way to register a multi sensor thermal zone
- Only one thermal zone to define in device tree
- Max has been re-added
- Enabled it on mt8195
Changes in v4:
- Rebased on lastest master (fixed the build issue)
- Dropped the average since I don't have any usecase for it
[1]: https://patchwork.kernel.org/comment/24723927/
Alexandre Bailon (4):
dt-bindings: thermal: Restore the thermal-sensors property
thermal: Add support of multi sensors to thermal_core
thermal: Add support of multi sensors to thermal_of
ARM64: mt8195: Use thermal aggregation for big and little cpu
.../bindings/thermal/thermal-zones.yaml | 5 +-
arch/arm64/boot/dts/mediatek/mt8195.dtsi | 212 ++-----------
drivers/thermal/Makefile | 1 +
drivers/thermal/thermal_core.h | 15 +
drivers/thermal/thermal_multi.c | 288 ++++++++++++++++++
drivers/thermal/thermal_of.c | 250 ++++++++++++++-
include/uapi/linux/thermal.h | 5 +
7 files changed, 579 insertions(+), 197 deletions(-)
create mode 100644 drivers/thermal/thermal_multi.c
--
2.44.1
This adds support of multi sensors to thermal.
Currently, this only support the get_temp operation.
This returns the maximum temperature among all the sensors.
Signed-off-by: Alexandre Bailon <[email protected]>
---
drivers/thermal/Makefile | 1 +
drivers/thermal/thermal_core.h | 15 ++
drivers/thermal/thermal_multi.c | 286 ++++++++++++++++++++++++++++++++
include/uapi/linux/thermal.h | 5 +
4 files changed, 307 insertions(+)
create mode 100644 drivers/thermal/thermal_multi.c
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 5cdf7d68687f..872190f9062b 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -6,6 +6,7 @@ CFLAGS_thermal_core.o := -I$(src)
obj-$(CONFIG_THERMAL) += thermal_sys.o
thermal_sys-y += thermal_core.o thermal_sysfs.o
thermal_sys-y += thermal_trip.o thermal_helpers.o
+thermal_sys-y += thermal_multi.o
# netlink interface to manage the thermal framework
thermal_sys-$(CONFIG_THERMAL_NETLINK) += thermal_netlink.o
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 20e7b45673d6..c2cf2c19738a 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -261,6 +261,21 @@ ssize_t weight_show(struct device *, struct device_attribute *, char *);
ssize_t weight_store(struct device *, struct device_attribute *, const char *,
size_t);
+/* Multi sensors */
+struct thermal_zone_device *thermal_multi_sensor_find_tz(const char *type);
+struct thermal_zone_device_ops *thermal_multi_sensor_alloc_ops(void);
+struct thermal_zone_device *thermal_multi_sensor_tz_alloc(const char *type,
+ struct thermal_trip *trips,
+ int num_trips,
+ struct thermal_zone_device_ops *ops,
+ int passive_delay, int polling_delay);
+void thermal_multi_sensor_tz_free(struct thermal_zone_device *tz);
+int thermal_multi_sensor_validate_coeff(int *coeff, int count, int offset);
+int thermal_multi_sensor_register(struct thermal_zone_device *tz,
+ struct thermal_zone_device *sensor_tz, int coeff);
+void thermal_multi_sensor_unregister(struct thermal_zone_device *sensor_tz);
+
+
#ifdef CONFIG_THERMAL_STATISTICS
void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
unsigned long new_state);
diff --git a/drivers/thermal/thermal_multi.c b/drivers/thermal/thermal_multi.c
new file mode 100644
index 000000000000..4b3f261a7000
--- /dev/null
+++ b/drivers/thermal/thermal_multi.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include "thermal_core.h"
+
+struct sensor_interface {
+ struct thermal_zone_device *tz;
+ int coeff;
+
+ struct list_head node;
+};
+
+struct multi_sensor_thermal_zone {
+ struct thermal_zone_device *tz;
+ struct mutex sensors_lock;
+ struct list_head sensors;
+
+ struct list_head node;
+};
+
+static DEFINE_MUTEX(multi_tz_mutex);
+static LIST_HEAD(multi_tz_list);
+
+#define TJ_MAX 120000
+
+static int multi_sensor_get_temp_max(struct thermal_zone_device *tz, int *temp)
+{
+ struct multi_sensor_thermal_zone *multi_tz = tz->devdata;
+ struct sensor_interface *sensor;
+ int max_temp;
+ int ret;
+
+ mutex_lock(&multi_tz->sensors_lock);
+
+ if (list_empty(&multi_tz->sensors)) {
+ mutex_unlock(&multi_tz->sensors_lock);
+ return -ENODEV;
+ }
+
+ list_for_each_entry(sensor, &multi_tz->sensors, node) {
+ ret = thermal_zone_get_temp(sensor->tz, temp);
+ if (ret) {
+ mutex_unlock(&multi_tz->sensors_lock);
+ return ret;
+ }
+
+ max_temp = max(max_temp, *temp * sensor->coeff);
+ }
+
+ mutex_unlock(&multi_tz->sensors_lock);
+
+ *temp = max_temp;
+ return ret;
+}
+
+/**
+ * Check if the sum of the coefficients multiplied by sensors temperature plus
+ * an offset won't overflow during the aggregation.
+ * @coeff: An array of coefficient
+ * @count: Number of coefficient
+ * @offset: The offset
+ *
+ * Returns: 0 if the coefficient are safe, -EOVERFLOW otherwise
+ */
+int thermal_multi_sensor_validate_coeff(int *coeff, int count, int offset)
+{
+ int max_accumulated_temp = 0;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ max_accumulated_temp += TJ_MAX * coeff[i];
+ if (max_accumulated_temp < 0)
+ return -EOVERFLOW;
+ }
+
+ max_accumulated_temp += offset;
+ return max_accumulated_temp < 0 ? -EOVERFLOW : 0;
+}
+
+/**
+ * Find a multi sensor thermal zone
+ * @type: The thermal zone type to find
+ *
+ * Returns: a pointer to the thermal zone or NULL if not found
+ */
+struct thermal_zone_device *thermal_multi_sensor_find_tz(const char *type)
+{
+ struct thermal_zone_device *tz;
+
+ tz = thermal_zone_get_zone_by_name(type);
+ if (IS_ERR(tz))
+ return NULL;
+ return tz;
+}
+
+/**
+ * Allocate a struct thermal_zone_device_ops for the multi sensor thermal zoen
+ *
+ * This allocates a struct thermal_zone_device_ops with a predifiend get_temp
+ * operation. This allows setting the other function pointers before registering
+ * the thermal zone.
+ *
+ * Returns: a pointer to the created struct thermal_zone_device_ops or an
+ * in case of error, an ERR_PTR. Caller must check return value with
+ * IS_ERR*() helpers.
+ */
+struct thermal_zone_device_ops *thermal_multi_sensor_alloc_ops(void)
+{
+ struct thermal_zone_device_ops *ops;
+
+ ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ return ERR_PTR(-ENOMEM);
+
+ ops->get_temp = multi_sensor_get_temp_max;
+
+ return ops;
+}
+
+/**
+ * Register a new thermal zone device that supports multi sensors
+ * @type: the thermal zone device type
+ * @trips: a pointer to an array of thermal trips
+ * @num_trips: the number of trip points the thermal zone support
+ * @mask: a bit string indicating the writeablility of trip points
+ * @ops: standard thermal zone device callbacks
+ * @passive_delay: number of milliseconds to wait between polls when
+ * performing passive cooling
+ * @polling_delay: number of milliseconds to wait between polls when checking
+ * whether trip points have been crossed (0 for interrupt
+ * driven systems)
+ *
+ * This function allocates and register a multi sensor thermal zone.
+ * To register a sensor to this thermal zone, use thermal_multi_sensor_register().
+ * thermal_multi_sensor_unregister() must be called to unregister the sensors
+ * and release this thermal zone when it is not used anymore.
+ *
+ * Return: a pointer to the created struct thermal_zone_device or an
+ * in case of error, an ERR_PTR. Caller must check return value with
+ * IS_ERR*() helpers.
+ */
+struct thermal_zone_device *thermal_multi_sensor_tz_alloc(const char *type,
+ struct thermal_trip *trips,
+ int num_trips,
+ struct thermal_zone_device_ops *ops,
+ int passive_delay, int polling_delay)
+{
+ struct thermal_zone_device *tz;
+ struct thermal_zone_params tzp = {};
+ struct multi_sensor_thermal_zone *multi_tz;
+
+ mutex_lock(&multi_tz_mutex);
+
+ tz = thermal_zone_get_zone_by_name(type);
+ if (!IS_ERR(tz))
+ goto unlock;
+
+ multi_tz = kzalloc(sizeof(*multi_tz), GFP_KERNEL);
+ if (!multi_tz) {
+ tz = ERR_PTR(-ENOMEM);
+ goto unlock;
+ }
+ mutex_init(&multi_tz->sensors_lock);
+ INIT_LIST_HEAD(&multi_tz->sensors);
+
+ tzp.no_hwmon = true;
+ tzp.slope = 1;
+ tzp.offset = 0;
+
+ tz = thermal_zone_device_register_with_trips(type, trips, num_trips,
+ multi_tz, ops, &tzp,
+ passive_delay, polling_delay);
+ if (IS_ERR(tz)) {
+ kfree(multi_tz);
+ } else {
+ multi_tz->tz = tz;
+ list_add(&multi_tz->node, &multi_tz_list);
+ }
+
+unlock:
+ mutex_unlock(&multi_tz_mutex);
+ return tz;
+}
+
+/**
+ * Remove all sensors from multi sensor thermal zone and release it
+ *
+ * This function must not be used except on error path to correctly
+ * release all the allocated resources.
+ * Use thermal_multi_sensor_unregister() to unregister a sensor and
+ * release a thermal zone that is not used anymore.
+ *
+ * @tz: Pointer to thermal zone to release
+ */
+void thermal_multi_sensor_tz_free(struct thermal_zone_device *tz)
+{
+ struct multi_sensor_thermal_zone *multi_tz = tz->devdata;
+ struct thermal_zone_device_ops *ops = &tz->ops;
+ struct sensor_interface *sensor, *tmp;
+
+ list_for_each_entry_safe(sensor, tmp, &multi_tz->sensors, node) {
+ list_del(&sensor->node);
+ kfree(sensor);
+ }
+
+ thermal_zone_device_unregister(tz);
+ list_del(&multi_tz->node);
+ kfree(multi_tz);
+ kfree(ops);
+}
+
+/**
+ * Register a thermal sensor to a multi sensor thermal zone
+ * @tz: The multi sensor thermal zone
+ * @sensor_tz: The thermal zone of the zensor to register
+ * @coeff: The coefficient to apply to the temperature returned by the sensor
+ *
+ * Returns: On success 0, a negative value in case of error
+ */
+int thermal_multi_sensor_register(struct thermal_zone_device *tz,
+ struct thermal_zone_device *sensor_tz,
+ int coeff)
+{
+ struct multi_sensor_thermal_zone *multi_tz;
+ struct sensor_interface *sensor;
+
+ mutex_lock(&multi_tz_mutex);
+
+ multi_tz = tz->devdata;
+
+ sensor = kzalloc(sizeof(*sensor), GFP_KERNEL);
+ if (!sensor) {
+ mutex_unlock(&multi_tz_mutex);
+ return -ENOMEM;
+ }
+
+ sensor->tz = sensor_tz;
+ sensor->coeff = coeff;
+ mutex_lock(&multi_tz->sensors_lock);
+ list_add(&sensor->node, &multi_tz->sensors);
+ mutex_unlock(&multi_tz->sensors_lock);
+
+ thermal_zone_device_enable(tz);
+
+ mutex_unlock(&multi_tz_mutex);
+
+ return 0;
+}
+
+/**
+ * Unregister a thermal sensor from a multi sensor thermal zone
+ *
+ * This unregister a thermal sensor from a multi sensor thermal zone.
+ * If all the sensors have been removed then this also release the multi sensor
+ * thermal zone.
+ * @sensor_tz: The sensor to unregister
+ */
+void thermal_multi_sensor_unregister(struct thermal_zone_device *sensor_tz)
+{
+ struct multi_sensor_thermal_zone *multi_tz, *tmp_tz;
+ struct sensor_interface *sensor, *tmp;
+
+ mutex_lock(&multi_tz_mutex);
+ list_for_each_entry_safe(multi_tz, tmp_tz, &multi_tz_list, node) {
+ mutex_lock(&multi_tz->sensors_lock);
+ list_for_each_entry_safe(sensor, tmp, &multi_tz->sensors, node) {
+ if (sensor->tz == sensor_tz) {
+ list_del(&sensor->node);
+ kfree(sensor);
+ break;
+ }
+ }
+ mutex_unlock(&multi_tz->sensors_lock);
+
+ if (list_empty(&multi_tz->sensors))
+ thermal_multi_sensor_tz_free(multi_tz->tz);
+ }
+ mutex_unlock(&multi_tz_mutex);
+}
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index fc78bf3aead7..e4f6c4c5e6fd 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -16,6 +16,11 @@ enum thermal_trip_type {
THERMAL_TRIP_CRITICAL,
};
+enum thermal_aggregation_type {
+ THERMAL_AGGR_AVG = 0,
+ THERMAL_AGGR_MAX = 1,
+};
+
/* Adding event notification support elements */
#define THERMAL_GENL_FAMILY_NAME "thermal"
#define THERMAL_GENL_VERSION 0x01
--
2.44.1