2023-05-25 14:21:47

by Daniel Lezcano

[permalink] [raw]
Subject: [PATCH 4/8] thermal/core: Update the generic trip points

At this point, the generic trip points rework allows to create a
thermal zone with a fixed number of trip points. This usage satisfy
almost all of the existing drivers.

A few remaining drivers have a mechanism where the firmware updates
the trip points. But there is no such update mechanism for the generic
trip points, thus those drivers can not be converted to the generic
approach.

This patch provides a function 'thermal_zone_trips_update()' allowing
to change the trip points of a thermal zone.

At the same time, with the logic the trip points array is passed as a
parameter to the thermal zone at creation time, we make our own
private trip points array by copying the one passed as parameter.

Note, no code has been found where the trip points update leads to a
refresh of the trip points in sysfs, so it is very unlikey the number
of trip points changes. However, for the sake of consistency it would
be nicer to have the trip points being refreshed in sysfs also, but
that could be done in a separate set of changes.

Signed-off-by: Daniel Lezcano <[email protected]>
---
drivers/thermal/thermal_core.c | 40 ++++++++---------
drivers/thermal/thermal_core.h | 3 ++
drivers/thermal/thermal_trip.c | 78 ++++++++++++++++++++++++++++++++++
include/linux/thermal.h | 4 ++
4 files changed, 102 insertions(+), 23 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index afcd4197babd..3688b06401c8 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1224,32 +1224,11 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
return ERR_PTR(-EINVAL);
}

- /*
- * Max trip count can't exceed 31 as the "mask >> num_trips" condition.
- * For example, shifting by 32 will result in compiler warning:
- * warning: right shift count >= width of type [-Wshift-count- overflow]
- *
- * Also "mask >> num_trips" will always be true with 32 bit shift.
- * E.g. mask = 0x80000000 for trip id 31 to be RW. Then
- * mask >> 32 = 0x80000000
- * This will result in failure for the below condition.
- *
- * Check will be true when the bit 31 of the mask is set.
- * 32 bit shift will cause overflow of 4 byte integer.
- */
- if (num_trips > (BITS_PER_TYPE(int) - 1) || num_trips < 0 || mask >> num_trips) {
- pr_err("Incorrect number of thermal trips\n");
- return ERR_PTR(-EINVAL);
- }
-
if (!ops) {
pr_err("Thermal zone device ops not defined\n");
return ERR_PTR(-EINVAL);
}

- if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp) && !trips)
- return ERR_PTR(-EINVAL);
-
if (!thermal_class)
return ERR_PTR(-ENODEV);

@@ -1283,8 +1262,22 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
tz->ops = ops;
tz->device.class = thermal_class;
tz->devdata = devdata;
- tz->trips = trips;
- tz->num_trips = num_trips;
+
+ if (trips) {
+ result = __thermal_zone_trips_update(tz, trips, num_trips, mask);
+ if (result)
+ goto remove_id;
+ } else {
+ /*
+ * Legacy trip point handling
+ */
+ if ((!tz->ops->get_trip_type || !tz->ops->get_trip_temp) && num_trips) {
+ result = -EINVAL;
+ goto remove_id;
+ }
+
+ tz->num_trips = num_trips;
+ }

thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay);
thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay);
@@ -1451,6 +1444,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
mutex_unlock(&tz->lock);

kfree(tz->tzp);
+ kfree(tz->trips);

put_device(&tz->device);

diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 84ada34ff079..c27a9930f904 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -125,6 +125,9 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
void __thermal_zone_set_trips(struct thermal_zone_device *tz);
int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
struct thermal_trip *trip);
+int __thermal_zone_trips_update(struct thermal_zone_device *tz,
+ struct thermal_trip *trips,
+ int num_trips, int mask);
int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);

/* sysfs I/F */
diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
index 61d927c35776..171b1902c01c 100644
--- a/drivers/thermal/thermal_trip.c
+++ b/drivers/thermal/thermal_trip.c
@@ -7,6 +7,8 @@
*
* Thermal trips handling
*/
+#include <linux/slab.h>
+
#define THERMAL_CORE_SUBSYS
#include "thermal_core.h"

@@ -181,3 +183,79 @@ int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,

return 0;
}
+
+int __thermal_zone_trips_update(struct thermal_zone_device *tz, struct thermal_trip *trips,
+ int num_trips, int mask)
+{
+ struct thermal_trip *new_trips;
+
+ /*
+ * Legacy trip point handling is incompatible with this
+ * function
+ */
+ if (tz->ops->get_trip_type || tz->ops->get_trip_temp) {
+ pr_err("Legacy trip points use incompatible function '%s'\n", __func__);
+ return -ENOSYS;
+ }
+
+ /*
+ * Max trip count can't exceed 31 as the "mask >> num_trips" condition.
+ * For example, shifting by 32 will result in compiler warning:
+ * warning: right shift count >= width of type [-Wshift-count- overflow]
+ *
+ * Also "mask >> num_trips" will always be true with 32 bit shift.
+ * E.g. mask = 0x80000000 for trip id 31 to be RW. Then
+ * mask >> 32 = 0x80000000
+ * This will result in failure for the below condition.
+ *
+ * Check will be true when the bit 31 of the mask is set.
+ * 32 bit shift will cause overflow of 4 byte integer.
+ */
+ if (num_trips > (BITS_PER_TYPE(int) - 1) || num_trips < 0 || mask >> num_trips) {
+ pr_err("Incorrect number of thermal trips\n");
+ return -EINVAL;
+ }
+
+ /*
+ * New generic trip point handling
+ */
+ if (num_trips > 0 && !trips) {
+ pr_err("Inconsistent parameters\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Allocate our private trip points array structure
+ */
+ new_trips = kmemdup(trips, sizeof(*trips) * num_trips, GFP_KERNEL);
+ if (!new_trips)
+ return -ENOMEM;
+
+ /*
+ * Newly allocated thermal zone will have the 'trips' field
+ * NULL, kfree() is immune against that
+ */
+ kfree(tz->trips);
+ tz->trips = new_trips;
+ tz->num_trips = num_trips;
+
+ return 0;
+}
+
+int thermal_zone_trips_update(struct thermal_zone_device *tz, struct thermal_trip *trips,
+ int num_trips, int mask)
+{
+ int ret;
+
+ mutex_lock(&tz->lock);
+ ret = __thermal_zone_trips_update(tz, trips, num_trips, mask);
+ mutex_unlock(&tz->lock);
+
+ if (ret)
+ return ret;
+
+ __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_trips_update);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 87837094d549..83937256a01c 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -291,6 +291,10 @@ int thermal_zone_get_num_trips(struct thermal_zone_device *tz);

int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp);

+int thermal_zone_trips_update(struct thermal_zone_device *tz,
+ struct thermal_trip *trips,
+ int num_trips, int mask);
+
#ifdef CONFIG_THERMAL_ACPI
int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp);
int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp);
--
2.34.1



2023-06-20 12:10:30

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 4/8] thermal/core: Update the generic trip points

On Thu, May 25, 2023 at 4:02 PM Daniel Lezcano
<[email protected]> wrote:
>
> At this point, the generic trip points rework allows to create a
> thermal zone with a fixed number of trip points. This usage satisfy
> almost all of the existing drivers.
>
> A few remaining drivers have a mechanism where the firmware updates
> the trip points. But there is no such update mechanism for the generic
> trip points, thus those drivers can not be converted to the generic
> approach.
>
> This patch provides a function 'thermal_zone_trips_update()' allowing
> to change the trip points of a thermal zone.
>
> At the same time, with the logic the trip points array is passed as a
> parameter to the thermal zone at creation time, we make our own
> private trip points array by copying the one passed as parameter.

So the design seems to require the caller to create a new array of
trip points and pass it to thermal_zone_trips_update(), so it can
replace the zone's trips array with it.

If only one trip point changes and there are multiple defined, this is
rather not efficient.

Do you want to prevent the core from using stale trip points this way?
If so, it should be stated here.

Moreover, at least in the cases when num_trips doesn't change, it
might be more efficient to walk the new trips[] array and only copy
the ones that have changed over their old versions.

I am also not sure if this interface is going to be convenient from
the user's perspective, especially if the trips get sorted by the core
(in the future). They would need to recreate the entire trips array
every time from scratch, even if only one trip point changes, which
means quite a bit of churn for thermal zones with many trip points.

It might be better to allow them to update trips in place and notify
the core about the change, all under the zone lock to prevent the core
from using trips simultaneously.

And arguably, changing num_trips would be questionable due to the
sysfs consistency reasons mentioned below.

> Note, no code has been found where the trip points update leads to a
> refresh of the trip points in sysfs, so it is very unlikey the number
> of trip points changes. However, for the sake of consistency it would
> be nicer to have the trip points being refreshed in sysfs also, but
> that could be done in a separate set of changes.

So at this point user space has already enumerated the trip points, so
it may fail if some of them go away or it may not be able to use any
new trip points appearing in sysfs.

For this reason, until there is a way to notify user space about the
need to re-enumerate trip points (and user space indicates support for
it), the only trip point property that may change in sysfs is the
temperature.

> Signed-off-by: Daniel Lezcano <[email protected]>
> ---
> drivers/thermal/thermal_core.c | 40 ++++++++---------
> drivers/thermal/thermal_core.h | 3 ++
> drivers/thermal/thermal_trip.c | 78 ++++++++++++++++++++++++++++++++++
> include/linux/thermal.h | 4 ++
> 4 files changed, 102 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index afcd4197babd..3688b06401c8 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -1224,32 +1224,11 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
> return ERR_PTR(-EINVAL);
> }
>
> - /*
> - * Max trip count can't exceed 31 as the "mask >> num_trips" condition.
> - * For example, shifting by 32 will result in compiler warning:
> - * warning: right shift count >= width of type [-Wshift-count- overflow]
> - *
> - * Also "mask >> num_trips" will always be true with 32 bit shift.
> - * E.g. mask = 0x80000000 for trip id 31 to be RW. Then
> - * mask >> 32 = 0x80000000
> - * This will result in failure for the below condition.
> - *
> - * Check will be true when the bit 31 of the mask is set.
> - * 32 bit shift will cause overflow of 4 byte integer.
> - */
> - if (num_trips > (BITS_PER_TYPE(int) - 1) || num_trips < 0 || mask >> num_trips) {
> - pr_err("Incorrect number of thermal trips\n");
> - return ERR_PTR(-EINVAL);
> - }
> -
> if (!ops) {
> pr_err("Thermal zone device ops not defined\n");
> return ERR_PTR(-EINVAL);
> }
>
> - if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp) && !trips)
> - return ERR_PTR(-EINVAL);
> -
> if (!thermal_class)
> return ERR_PTR(-ENODEV);
>
> @@ -1283,8 +1262,22 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
> tz->ops = ops;
> tz->device.class = thermal_class;
> tz->devdata = devdata;
> - tz->trips = trips;
> - tz->num_trips = num_trips;
> +
> + if (trips) {
> + result = __thermal_zone_trips_update(tz, trips, num_trips, mask);
> + if (result)
> + goto remove_id;
> + } else {
> + /*
> + * Legacy trip point handling
> + */
> + if ((!tz->ops->get_trip_type || !tz->ops->get_trip_temp) && num_trips) {
> + result = -EINVAL;
> + goto remove_id;
> + }
> +
> + tz->num_trips = num_trips;
> + }
>
> thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay);
> thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay);
> @@ -1451,6 +1444,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
> mutex_unlock(&tz->lock);
>
> kfree(tz->tzp);
> + kfree(tz->trips);
>
> put_device(&tz->device);
>
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index 84ada34ff079..c27a9930f904 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -125,6 +125,9 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
> void __thermal_zone_set_trips(struct thermal_zone_device *tz);
> int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
> struct thermal_trip *trip);
> +int __thermal_zone_trips_update(struct thermal_zone_device *tz,
> + struct thermal_trip *trips,
> + int num_trips, int mask);
> int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
>
> /* sysfs I/F */
> diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
> index 61d927c35776..171b1902c01c 100644
> --- a/drivers/thermal/thermal_trip.c
> +++ b/drivers/thermal/thermal_trip.c
> @@ -7,6 +7,8 @@
> *
> * Thermal trips handling
> */
> +#include <linux/slab.h>
> +
> #define THERMAL_CORE_SUBSYS
> #include "thermal_core.h"
>
> @@ -181,3 +183,79 @@ int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
>
> return 0;
> }
> +
> +int __thermal_zone_trips_update(struct thermal_zone_device *tz, struct thermal_trip *trips,
> + int num_trips, int mask)
> +{
> + struct thermal_trip *new_trips;
> +
> + /*
> + * Legacy trip point handling is incompatible with this
> + * function
> + */
> + if (tz->ops->get_trip_type || tz->ops->get_trip_temp) {
> + pr_err("Legacy trip points use incompatible function '%s'\n", __func__);
> + return -ENOSYS;
> + }
> +
> + /*
> + * Max trip count can't exceed 31 as the "mask >> num_trips" condition.
> + * For example, shifting by 32 will result in compiler warning:
> + * warning: right shift count >= width of type [-Wshift-count- overflow]
> + *
> + * Also "mask >> num_trips" will always be true with 32 bit shift.
> + * E.g. mask = 0x80000000 for trip id 31 to be RW. Then
> + * mask >> 32 = 0x80000000
> + * This will result in failure for the below condition.
> + *
> + * Check will be true when the bit 31 of the mask is set.
> + * 32 bit shift will cause overflow of 4 byte integer.
> + */
> + if (num_trips > (BITS_PER_TYPE(int) - 1) || num_trips < 0 || mask >> num_trips) {
> + pr_err("Incorrect number of thermal trips\n");
> + return -EINVAL;
> + }
> +
> + /*
> + * New generic trip point handling
> + */
> + if (num_trips > 0 && !trips) {
> + pr_err("Inconsistent parameters\n");
> + return -EINVAL;
> + }
> +
> + /*
> + * Allocate our private trip points array structure
> + */
> + new_trips = kmemdup(trips, sizeof(*trips) * num_trips, GFP_KERNEL);
> + if (!new_trips)
> + return -ENOMEM;
> +
> + /*
> + * Newly allocated thermal zone will have the 'trips' field
> + * NULL, kfree() is immune against that
> + */
> + kfree(tz->trips);
> + tz->trips = new_trips;
> + tz->num_trips = num_trips;
> +
> + return 0;
> +}
> +
> +int thermal_zone_trips_update(struct thermal_zone_device *tz, struct thermal_trip *trips,
> + int num_trips, int mask)
> +{
> + int ret;
> +
> + mutex_lock(&tz->lock);
> + ret = __thermal_zone_trips_update(tz, trips, num_trips, mask);
> + mutex_unlock(&tz->lock);
> +
> + if (ret)
> + return ret;
> +
> + __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);

This is called under tz->lock in other places AFAICS. Why not here?

> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(thermal_zone_trips_update);
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 87837094d549..83937256a01c 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -291,6 +291,10 @@ int thermal_zone_get_num_trips(struct thermal_zone_device *tz);
>
> int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp);
>
> +int thermal_zone_trips_update(struct thermal_zone_device *tz,
> + struct thermal_trip *trips,
> + int num_trips, int mask);
> +
> #ifdef CONFIG_THERMAL_ACPI
> int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp);
> int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp);
> --

2023-06-20 18:51:07

by Daniel Lezcano

[permalink] [raw]
Subject: Re: [PATCH 4/8] thermal/core: Update the generic trip points


Hi Rafael,

thanks for the comments


On 20/06/2023 13:28, Rafael J. Wysocki wrote:
> On Thu, May 25, 2023 at 4:02 PM Daniel Lezcano
> <[email protected]> wrote:
>>
>> At this point, the generic trip points rework allows to create a
>> thermal zone with a fixed number of trip points. This usage satisfy
>> almost all of the existing drivers.
>>
>> A few remaining drivers have a mechanism where the firmware updates
>> the trip points. But there is no such update mechanism for the generic
>> trip points, thus those drivers can not be converted to the generic
>> approach.
>>
>> This patch provides a function 'thermal_zone_trips_update()' allowing
>> to change the trip points of a thermal zone.
>>
>> At the same time, with the logic the trip points array is passed as a
>> parameter to the thermal zone at creation time, we make our own
>> private trip points array by copying the one passed as parameter.
>
> So the design seems to require the caller to create a new array of
> trip points and pass it to thermal_zone_trips_update(), so it can
> replace the zone's trips array with it.
>
> If only one trip point changes and there are multiple defined, this is
> rather not efficient.

This update is only for replacing the current trip array when one or
several trip points are added or removed. We can see that in one or two
drivers only.

This function is supposed to be called rarely (and I doubt there is
really a lot of hardware sending notification to add/remove trip points).

For changing a trip point property like its temperature or its
hysteresis, we keep the usual set_trip_point() function.

> Do you want to prevent the core from using stale trip points this way?
> If so, it should be stated here.

No, that will be a side effect. We can put this point apart, it will be
addressed in a cleanup series after everything is converted to the
generic trip points.


> Moreover, at least in the cases when num_trips doesn't change, it
> might be more efficient to walk the new trips[] array and only copy
> the ones that have changed over their old versions.

IMO, that is over-engineered, especially for dedicating this
optimization for a very few drivers and ultra rare usages.


> I am also not sure if this interface is going to be convenient from
> the user's perspective, especially if the trips get sorted by the core
> (in the future). They would need to recreate the entire trips array
> every time from scratch, even if only one trip point changes, which
> means quite a bit of churn for thermal zones with many trip points.

Actually, the driver is not supposed to deal with the array. It can
create the array on the stack, pass it to the
thermal_zone_device_register_with_trips() function and forget about it.

The trip points array should not be used by a driver anymore.


> It might be better to allow them to update trips in place and notify
> the core about the change, all under the zone lock to prevent the core
> from using trips simultaneously.

I'm not sure to understand. The core code is called with this function
and takes the lock.

> And arguably, changing num_trips would be questionable due to the
> sysfs consistency reasons mentioned below.

[ ... ]

>> Note, no code has been found where the trip points update leads to a
>> refresh of the trip points in sysfs, so it is very unlikey the number
>> of trip points changes. However, for the sake of consistency it would
>> be nicer to have the trip points being refreshed in sysfs also, but
>> that could be done in a separate set of changes.
>
> So at this point user space has already enumerated the trip points, so
> it may fail if some of them go away or it may not be able to use any
> new trip points appearing in sysfs.

Yes, that is why I think the adding/removal of the trip points was never
really supported. I would be very curious to see a platform with such a
feature.

> For this reason, until there is a way to notify user space about the
> need to re-enumerate trip points (and user space indicates support for
> it), the only trip point property that may change in sysfs is the
> temperature.

The userspace can be notified when there is a change with:

THERMAL_GENL_EVENT_TZ_TRIP_CHANGE
THERMAL_GENL_EVENT_TZ_TRIP_ADD
THERMAL_GENL_EVENT_TZ_TRIP_DELETE

The last two ones are not implemented today but that could be done after
as that would be a new feature.

Let me summarize the situation:

- The trip point crossing events are not correctly detected because of
how they are handled and we need to sort them out.

- In order to sort them out, we need to convert the drivers to the
generic trip point and remove all those get_trip_* | set_trip_* ops

- Almost all the drivers are converted except the ACPI thermal and the
intel_soc_dts_iosf drivers which are blocking the feature

- The ACPI thermal driver can potentially add or remove a trip point
but we are not sure that can happen

- We need to decorrelate the trip id and the array index for the ACPI
thermal driver

The generic trip points change is a big chunk of work and I would like
to have some progress to fix the trip crossing detection along with the
removal of the resulting dead code.

Given there may not be a real usage of the thermal trip number update,
can we stay simple and keep the proposed change but forcing the same
number of trip points ?

We can then improve the existing code if it is really needed


--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog


2023-06-23 18:32:05

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 4/8] thermal/core: Update the generic trip points

On Tue, Jun 20, 2023 at 8:35 PM Daniel Lezcano
<[email protected]> wrote:
>
>
> Hi Rafael,
>
> thanks for the comments
>
>
> On 20/06/2023 13:28, Rafael J. Wysocki wrote:
> > On Thu, May 25, 2023 at 4:02 PM Daniel Lezcano
> > <[email protected]> wrote:
> >>
> >> At this point, the generic trip points rework allows to create a
> >> thermal zone with a fixed number of trip points. This usage satisfy
> >> almost all of the existing drivers.
> >>
> >> A few remaining drivers have a mechanism where the firmware updates
> >> the trip points. But there is no such update mechanism for the generic
> >> trip points, thus those drivers can not be converted to the generic
> >> approach.
> >>
> >> This patch provides a function 'thermal_zone_trips_update()' allowing
> >> to change the trip points of a thermal zone.
> >>
> >> At the same time, with the logic the trip points array is passed as a
> >> parameter to the thermal zone at creation time, we make our own
> >> private trip points array by copying the one passed as parameter.
> >
> > So the design seems to require the caller to create a new array of
> > trip points and pass it to thermal_zone_trips_update(), so it can
> > replace the zone's trips array with it.
> >
> > If only one trip point changes and there are multiple defined, this is
> > rather not efficient.
>
> This update is only for replacing the current trip array when one or
> several trip points are added or removed. We can see that in one or two
> drivers only.

Well, I'm not sure about this adding/removing idea, because of the
sysfs issue mentioned below.

Surely, some trip points can be made invalid by setting their
temperatures to the "invalid" value, but they cannot be removed,
because they are already represented in sysfs and may have been
enumerated by user space which will get confused by removing them.

Conversely, if new trip points are added, they will not appear in
sysfs and so user space will never become aware of them.

> This function is supposed to be called rarely (and I doubt there is
> really a lot of hardware sending notification to add/remove trip points).
>
> For changing a trip point property like its temperature or its
> hysteresis, we keep the usual set_trip_point() function.

Do you mean the ->set_trip_temp/hyst() zone callback?

But isn't that to be used by the core in order to notify the driver of
a change, rather than the other way around?

Here, we have a situation where the driver needs to let the core know
that something has changed.

> > Do you want to prevent the core from using stale trip points this way?
> > If so, it should be stated here.
>
> No, that will be a side effect. We can put this point apart, it will be
> addressed in a cleanup series after everything is converted to the
> generic trip points.

I think we need to talk about the design. It looks like you have one
in mind, but I'm not really sure what it is.

>
> > Moreover, at least in the cases when num_trips doesn't change, it
> > might be more efficient to walk the new trips[] array and only copy
> > the ones that have changed over their old versions.
>
> IMO, that is over-engineered, especially for dedicating this
> optimization for a very few drivers and ultra rare usages.

I'm not sure.

One thing to remember is that, in general, the core and the drivers
need to communicate both ways regarding trip points. For example,
when a trip point is updated via sysfs, the core needs to tell the
driver about that. Conversely, if a trip point is updated by the
platform firmware (say), the driver needs to tell the core about that.

> > I am also not sure if this interface is going to be convenient from
> > the user's perspective, especially if the trips get sorted by the core
> > (in the future). They would need to recreate the entire trips array
> > every time from scratch, even if only one trip point changes, which
> > means quite a bit of churn for thermal zones with many trip points.
>
> Actually, the driver is not supposed to deal with the array. It can
> create the array on the stack, pass it to the
> thermal_zone_device_register_with_trips() function and forget about it.
>
> The trip points array should not be used by a driver anymore.

Well, say that the core invokes ->set_trip_temp() for a certain trip
point. How is the driver going to know which trip point this is about
if it cannot access the trip points table in the zone?

> > It might be better to allow them to update trips in place and notify
> > the core about the change, all under the zone lock to prevent the core
> > from using trips simultaneously.
>
> I'm not sure to understand. The core code is called with this function
> and takes the lock.
>
> > And arguably, changing num_trips would be questionable due to the
> > sysfs consistency reasons mentioned below.
>
> [ ... ]
>
> >> Note, no code has been found where the trip points update leads to a
> >> refresh of the trip points in sysfs, so it is very unlikey the number
> >> of trip points changes. However, for the sake of consistency it would
> >> be nicer to have the trip points being refreshed in sysfs also, but
> >> that could be done in a separate set of changes.
> >
> > So at this point user space has already enumerated the trip points, so
> > it may fail if some of them go away or it may not be able to use any
> > new trip points appearing in sysfs.
>
> Yes, that is why I think the adding/removal of the trip points was never
> really supported. I would be very curious to see a platform with such a
> feature.
>
> > For this reason, until there is a way to notify user space about the
> > need to re-enumerate trip points (and user space indicates support for
> > it), the only trip point property that may change in sysfs is the
> > temperature.
>
> The userspace can be notified when there is a change with:
>
> THERMAL_GENL_EVENT_TZ_TRIP_CHANGE
> THERMAL_GENL_EVENT_TZ_TRIP_ADD
> THERMAL_GENL_EVENT_TZ_TRIP_DELETE
>
> The last two ones are not implemented today but that could be done after
> as that would be a new feature.
>
> Let me summarize the situation:
>
> - The trip point crossing events are not correctly detected because of
> how they are handled and we need to sort them out.

Sure.

> - In order to sort them out, we need to convert the drivers to the
> generic trip point and remove all those get_trip_* | set_trip_* ops

IIUC, there are cases when the trip point temperature may be set via
sysfs. In some of them, the core needs to tell the driver what
temperature to set for the trip, so the hardware or the platform
notifies it of excursions. The ->set_* callbacks are still going to
be needed for that if I'm not mistaken.

> - Almost all the drivers are converted except the ACPI thermal and the
> intel_soc_dts_iosf drivers which are blocking the feature

Sorry about that. Hopefully, I'll have some time to sort this out
during the next couple of weeks.

I still would like to discuss the design related to that, though.

> - The ACPI thermal driver can potentially add or remove a trip point
> but we are not sure that can happen

It may as per the spec, but whether or not this case needs to be
handled is a good question.

This depends on whether or not user space is prepared to deal with
trip points appearing and going away. I suspect that it isn't.

> - We need to decorrelate the trip id and the array index for the ACPI
> thermal driver

That should be doable.

> The generic trip points change is a big chunk of work and I would like
> to have some progress to fix the trip crossing detection along with the
> removal of the resulting dead code.
>
> Given there may not be a real usage of the thermal trip number update,
> can we stay simple and keep the proposed change but forcing the same
> number of trip points ?

I suppose so.

> We can then improve the existing code if it is really needed

2023-06-25 20:51:12

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 4/8] thermal/core: Update the generic trip points

On Thu, May 25, 2023 at 4:02 PM Daniel Lezcano
<[email protected]> wrote:
>
> At this point, the generic trip points rework allows to create a
> thermal zone with a fixed number of trip points. This usage satisfy
> almost all of the existing drivers.
>
> A few remaining drivers have a mechanism where the firmware updates
> the trip points. But there is no such update mechanism for the generic
> trip points, thus those drivers can not be converted to the generic
> approach.
>
> This patch provides a function 'thermal_zone_trips_update()' allowing
> to change the trip points of a thermal zone.
>
> At the same time, with the logic the trip points array is passed as a
> parameter to the thermal zone at creation time, we make our own
> private trip points array by copying the one passed as parameter.
>
> Note, no code has been found where the trip points update leads to a
> refresh of the trip points in sysfs, so it is very unlikey the number
> of trip points changes. However, for the sake of consistency it would
> be nicer to have the trip points being refreshed in sysfs also, but
> that could be done in a separate set of changes.
>
> Signed-off-by: Daniel Lezcano <[email protected]>
> ---
> drivers/thermal/thermal_core.c | 40 ++++++++---------
> drivers/thermal/thermal_core.h | 3 ++
> drivers/thermal/thermal_trip.c | 78 ++++++++++++++++++++++++++++++++++
> include/linux/thermal.h | 4 ++
> 4 files changed, 102 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index afcd4197babd..3688b06401c8 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -1224,32 +1224,11 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
> return ERR_PTR(-EINVAL);
> }
>
> - /*
> - * Max trip count can't exceed 31 as the "mask >> num_trips" condition.
> - * For example, shifting by 32 will result in compiler warning:
> - * warning: right shift count >= width of type [-Wshift-count- overflow]
> - *
> - * Also "mask >> num_trips" will always be true with 32 bit shift.
> - * E.g. mask = 0x80000000 for trip id 31 to be RW. Then
> - * mask >> 32 = 0x80000000
> - * This will result in failure for the below condition.
> - *
> - * Check will be true when the bit 31 of the mask is set.
> - * 32 bit shift will cause overflow of 4 byte integer.
> - */
> - if (num_trips > (BITS_PER_TYPE(int) - 1) || num_trips < 0 || mask >> num_trips) {
> - pr_err("Incorrect number of thermal trips\n");
> - return ERR_PTR(-EINVAL);
> - }
> -
> if (!ops) {
> pr_err("Thermal zone device ops not defined\n");
> return ERR_PTR(-EINVAL);
> }
>
> - if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp) && !trips)
> - return ERR_PTR(-EINVAL);
> -
> if (!thermal_class)
> return ERR_PTR(-ENODEV);
>
> @@ -1283,8 +1262,22 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
> tz->ops = ops;
> tz->device.class = thermal_class;
> tz->devdata = devdata;
> - tz->trips = trips;
> - tz->num_trips = num_trips;
> +
> + if (trips) {
> + result = __thermal_zone_trips_update(tz, trips, num_trips, mask);
> + if (result)
> + goto remove_id;
> + } else {
> + /*
> + * Legacy trip point handling
> + */
> + if ((!tz->ops->get_trip_type || !tz->ops->get_trip_temp) && num_trips) {
> + result = -EINVAL;
> + goto remove_id;
> + }
> +
> + tz->num_trips = num_trips;
> + }

Lest I forget, if I'm not mistaken, the above change would break the
int3403 driver that uses int340x_thermal_update_trips() to update trip
points in int3403_notify(), which handles notifications from the
platform firmware, and it updates them through the driver's private
pointer to the trips array used by the core with the assumption that
the core will notice the changes.

So it looks like at least this particular driver would need to be
updated before the $subject patch can be applied.

That said, I think that the ACPI thermal driver won't really need to
access the trips array after passing it to
thermal_zone_device_register_with_trips() and it may create a new
trips table every time the trip points are updated by the platform
firmware, but I'm not convinced about this design.