2024-04-25 13:57:59

by Rafael J. Wysocki

[permalink] [raw]
Subject: [PATCH v1 2/3] thermal/debugfs: Fix thermal zone locking

From: Rafael J. Wysocki <[email protected]>

With the current thermal zone locking arrangement in the debugfs code,
user space can open the "mitigations" file for a thermal zone before
the zone's debugfs pointer is set which will result in a NULL pointer
dereference in tze_seq_start().

While this could be addressed by putting the creation of the
"mitigations" file under thermal_dbg->lock, there is still a problem
with thermal_debug_tz_remove() that is not called under the thermal
zone lock and can run in parallel with the other functions accessing
the thermal zone's struct thermal_debugfs object. Then, it may
clear tz->debugfs after one of those functions has checked it and the
struct thermal_debugfs object may be freed prematurely.

To address both problems described above at once, use the observation
that thermal_debug_tz_trip_up(), thermal_debug_tz_trip_down(), and
thermal_debug_update_trip_stats() all run under the thermal zone
lock and because they all acquire thermal_dbg->lock for the thermal
zone's struct thermal_debugfs object, they must wait on that lock if it
is held while they are running and their callers (holding the thermal
zone lock) must wait along with them. This means that tze_seq_start()
may as well acquire tz->lock instead of thermal_dbg->lock and check the
struct thermal_debugfs object pointer retrieved from the thermal zone
against NULL under it.

Then, tz->lock can also be acquired by thermal_debug_tz_add() and
thermal_debug_tz_remove() to eliminate the race conditions at hand.

Rearrange the code in question accordingly and remove the
thermal_dbg->lock locking, which is now redundant, from it.

Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes")
Cc :6.8+ <[email protected]> # 6.8+
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
drivers/thermal/thermal_debugfs.c | 60 +++++++++++++++++---------------------
1 file changed, 28 insertions(+), 32 deletions(-)

Index: linux-pm/drivers/thermal/thermal_debugfs.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_debugfs.c
+++ linux-pm/drivers/thermal/thermal_debugfs.c
@@ -551,8 +551,6 @@ void thermal_debug_tz_trip_up(struct the
if (!thermal_dbg)
return;

- mutex_lock(&thermal_dbg->lock);
-
tz_dbg = &thermal_dbg->tz_dbg;

/*
@@ -591,7 +589,7 @@ void thermal_debug_tz_trip_up(struct the
if (!tz_dbg->nr_trips) {
tze = thermal_debugfs_tz_event_alloc(tz, now);
if (!tze)
- goto unlock;
+ return;

list_add(&tze->node, &tz_dbg->tz_episodes);
}
@@ -613,9 +611,6 @@ void thermal_debug_tz_trip_up(struct the

tze = list_first_entry(&tz_dbg->tz_episodes, struct tz_episode, node);
tze->trip_stats[trip_id].timestamp = now;
-
-unlock:
- mutex_unlock(&thermal_dbg->lock);
}

void thermal_debug_tz_trip_down(struct thermal_zone_device *tz,
@@ -631,8 +626,6 @@ void thermal_debug_tz_trip_down(struct t
if (!thermal_dbg)
return;

- mutex_lock(&thermal_dbg->lock);
-
tz_dbg = &thermal_dbg->tz_dbg;

/*
@@ -643,7 +636,7 @@ void thermal_debug_tz_trip_down(struct t
* no mitigation mechanism yet at boot time.
*/
if (!tz_dbg->nr_trips)
- goto out;
+ return;

for (i = tz_dbg->nr_trips - 1; i >= 0; i--) {
if (tz_dbg->trips_crossed[i] == trip_id)
@@ -651,7 +644,7 @@ void thermal_debug_tz_trip_down(struct t
}

if (i < 0)
- goto out;
+ return;

tz_dbg->nr_trips--;

@@ -671,9 +664,6 @@ void thermal_debug_tz_trip_down(struct t
*/
if (!tz_dbg->nr_trips)
tze->duration = ktime_sub(now, tze->timestamp);
-
-out:
- mutex_unlock(&thermal_dbg->lock);
}

void thermal_debug_update_trip_stats(struct thermal_zone_device *tz)
@@ -686,12 +676,10 @@ void thermal_debug_update_trip_stats(str
if (!thermal_dbg)
return;

- mutex_lock(&thermal_dbg->lock);
-
tz_dbg = &thermal_dbg->tz_dbg;

if (!tz_dbg->nr_trips)
- goto out;
+ return;

tze = list_first_entry(&tz_dbg->tz_episodes, struct tz_episode, node);

@@ -704,19 +692,22 @@ void thermal_debug_update_trip_stats(str
trip_stats->avg += (tz->temperature - trip_stats->avg) /
++trip_stats->count;
}
-out:
- mutex_unlock(&thermal_dbg->lock);
}

static void *tze_seq_start(struct seq_file *s, loff_t *pos)
{
struct thermal_zone_device *tz = s->private;
- struct thermal_debugfs *thermal_dbg = tz->debugfs;
- struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg;
+ struct thermal_debugfs *thermal_dbg;

- mutex_lock(&thermal_dbg->lock);
+ mutex_lock(&tz->lock);

- return seq_list_start(&tz_dbg->tz_episodes, *pos);
+ thermal_dbg = tz->debugfs;
+ if (!thermal_dbg) {
+ mutex_unlock(&tz->lock);
+ return NULL;
+ }
+
+ return seq_list_start(&thermal_dbg->tz_dbg.tz_episodes, *pos);
}

static void *tze_seq_next(struct seq_file *s, void *v, loff_t *pos)
@@ -731,9 +722,8 @@ static void *tze_seq_next(struct seq_fil
static void tze_seq_stop(struct seq_file *s, void *v)
{
struct thermal_zone_device *tz = s->private;
- struct thermal_debugfs *thermal_dbg = tz->debugfs;

- mutex_unlock(&thermal_dbg->lock);
+ mutex_unlock(&tz->lock);
}

static int tze_seq_show(struct seq_file *s, void *v)
@@ -826,23 +816,33 @@ void thermal_debug_tz_add(struct thermal

debugfs_create_file("mitigations", 0400, thermal_dbg->d_top, tz, &tze_fops);

+ mutex_lock(&tz->lock);
+
tz->debugfs = thermal_dbg;
+
+ mutex_unlock(&tz->lock);
}

void thermal_debug_tz_remove(struct thermal_zone_device *tz)
{
- struct thermal_debugfs *thermal_dbg = tz->debugfs;
+ struct thermal_debugfs *thermal_dbg;
struct tz_episode *tze, *tmp;
struct tz_debugfs *tz_dbg;
int *trips_crossed;

- if (!thermal_dbg)
+ mutex_lock(&tz->lock);
+
+ thermal_dbg = tz->debugfs;
+ if (!thermal_dbg) {
+ mutex_unlock(&tz->lock);
return;
+ }

- tz_dbg = &thermal_dbg->tz_dbg;
+ tz->debugfs = NULL;

- mutex_lock(&thermal_dbg->lock);
+ mutex_unlock(&tz->lock);

+ tz_dbg = &thermal_dbg->tz_dbg;
trips_crossed = tz_dbg->trips_crossed;

list_for_each_entry_safe(tze, tmp, &tz_dbg->tz_episodes, node) {
@@ -850,10 +850,6 @@ void thermal_debug_tz_remove(struct ther
kfree(tze);
}

- tz->debugfs = NULL;
-
- mutex_unlock(&thermal_dbg->lock);
-
thermal_debugfs_remove_id(thermal_dbg);
kfree(trips_crossed);
}





2024-04-25 15:48:12

by Rafael J. Wysocki

[permalink] [raw]
Subject: [Alternative][PATCH v1 2/3] thermal/debugfs: Fix two locking issues with thermal zone debug

From: Rafael J. Wysocki <[email protected]>

With the current thermal zone locking arrangement in the debugfs code,
user space can open the "mitigations" file for a thermal zone before
the zone's debugfs pointer is set which will result in a NULL pointer
dereference in tze_seq_start().

Moreover, thermal_debug_tz_remove() is not called under the thermal
zone lock, so can run in parallel with the other functions accessing
the thermal zone's struct thermal_debugfs object. Then, it may clear
tz->debugfs after one of those functions has checked it and the
struct thermal_debugfs object may be freed prematurely.

To address the first problem, pass a pointer to the thermal zone's
struct thermal_debugfs object to debugfs_create_file() in
thermal_debug_tz_add() and make tze_seq_start(), tze_seq_next(),
tze_seq_stop(), and tze_seq_show() retrieve it from s->private
instead of a pointer to the thermal zone object. This will ensure
that tz_debugfs will be valid across the "mitigations" file accesses
until thermal_debugfs_remove_id() called by thermal_debug_tz_remove()
removes that file.

To address the second problem, use tz->lock in thermal_debug_tz_remove()
around the tz->debugfs value check (in case the same thermal zone is
removed at the same time in two differet threads) and its reset to NULL.

Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes")
Cc :6.8+ <[email protected]> # 6.8+
Signed-off-by: Rafael J. Wysocki <[email protected]>
---

This is an alternative fix for the issues addressed by

https://lore.kernel.org/linux-pm/1888579.tdWV9SEqCh@kreacher/

and I slightly prefer it, because it is less intrusive and makes
the thermal zone debug code more consistent with the analogous code
for cdevs.

Accordingly, I've replace the above with this patch in the
thermal-core-next branch in linux-pm.git.

---
drivers/thermal/thermal_debugfs.c | 34 ++++++++++++++++++++++------------
1 file changed, 22 insertions(+), 12 deletions(-)

Index: linux-pm/drivers/thermal/thermal_debugfs.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_debugfs.c
+++ linux-pm/drivers/thermal/thermal_debugfs.c
@@ -139,11 +139,13 @@ struct tz_episode {
* we keep track of the current position in the history array.
*
* @tz_episodes: a list of thermal mitigation episodes
+ * @tz: thermal zone this object belongs to
* @trips_crossed: an array of trip points crossed by id
* @nr_trips: the number of trip points currently being crossed
*/
struct tz_debugfs {
struct list_head tz_episodes;
+ struct thermal_zone_device *tz;
int *trips_crossed;
int nr_trips;
};
@@ -710,8 +712,7 @@ out:

static void *tze_seq_start(struct seq_file *s, loff_t *pos)
{
- struct thermal_zone_device *tz = s->private;
- struct thermal_debugfs *thermal_dbg = tz->debugfs;
+ struct thermal_debugfs *thermal_dbg = s->private;
struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg;

mutex_lock(&thermal_dbg->lock);
@@ -721,8 +722,7 @@ static void *tze_seq_start(struct seq_fi

static void *tze_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct thermal_zone_device *tz = s->private;
- struct thermal_debugfs *thermal_dbg = tz->debugfs;
+ struct thermal_debugfs *thermal_dbg = s->private;
struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg;

return seq_list_next(v, &tz_dbg->tz_episodes, pos);
@@ -730,15 +730,15 @@ static void *tze_seq_next(struct seq_fil

static void tze_seq_stop(struct seq_file *s, void *v)
{
- struct thermal_zone_device *tz = s->private;
- struct thermal_debugfs *thermal_dbg = tz->debugfs;
+ struct thermal_debugfs *thermal_dbg = s->private;

mutex_unlock(&thermal_dbg->lock);
}

static int tze_seq_show(struct seq_file *s, void *v)
{
- struct thermal_zone_device *tz = s->private;
+ struct thermal_debugfs *thermal_dbg = s->private;
+ struct thermal_zone_device *tz = thermal_dbg->tz_dbg.tz;
struct thermal_trip_desc *td;
struct tz_episode *tze;
const char *type;
@@ -816,6 +816,8 @@ void thermal_debug_tz_add(struct thermal

tz_dbg = &thermal_dbg->tz_dbg;

+ tz_dbg->tz = tz;
+
tz_dbg->trips_crossed = kzalloc(sizeof(int) * tz->num_trips, GFP_KERNEL);
if (!tz_dbg->trips_crossed) {
thermal_debugfs_remove_id(thermal_dbg);
@@ -824,20 +826,30 @@ void thermal_debug_tz_add(struct thermal

INIT_LIST_HEAD(&tz_dbg->tz_episodes);

- debugfs_create_file("mitigations", 0400, thermal_dbg->d_top, tz, &tze_fops);
+ debugfs_create_file("mitigations", 0400, thermal_dbg->d_top,
+ thermal_dbg, &tze_fops);

tz->debugfs = thermal_dbg;
}

void thermal_debug_tz_remove(struct thermal_zone_device *tz)
{
- struct thermal_debugfs *thermal_dbg = tz->debugfs;
+ struct thermal_debugfs *thermal_dbg;
struct tz_episode *tze, *tmp;
struct tz_debugfs *tz_dbg;
int *trips_crossed;

- if (!thermal_dbg)
+ mutex_lock(&tz->lock);
+
+ thermal_dbg = tz->debugfs;
+ if (!thermal_dbg) {
+ mutex_unlock(&tz->lock);
return;
+ }
+
+ tz->debugfs = NULL;
+
+ mutex_unlock(&tz->lock);

tz_dbg = &thermal_dbg->tz_dbg;

@@ -850,8 +862,6 @@ void thermal_debug_tz_remove(struct ther
kfree(tze);
}

- tz->debugfs = NULL;
-
mutex_unlock(&thermal_dbg->lock);

thermal_debugfs_remove_id(thermal_dbg);




2024-04-25 22:20:39

by Lukasz Luba

[permalink] [raw]
Subject: Re: [Alternative][PATCH v1 2/3] thermal/debugfs: Fix two locking issues with thermal zone debug



On 4/25/24 16:47, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <[email protected]>
>
> With the current thermal zone locking arrangement in the debugfs code,
> user space can open the "mitigations" file for a thermal zone before
> the zone's debugfs pointer is set which will result in a NULL pointer
> dereference in tze_seq_start().
>
> Moreover, thermal_debug_tz_remove() is not called under the thermal
> zone lock, so can run in parallel with the other functions accessing
> the thermal zone's struct thermal_debugfs object. Then, it may clear
> tz->debugfs after one of those functions has checked it and the
> struct thermal_debugfs object may be freed prematurely.
>
> To address the first problem, pass a pointer to the thermal zone's
> struct thermal_debugfs object to debugfs_create_file() in
> thermal_debug_tz_add() and make tze_seq_start(), tze_seq_next(),
> tze_seq_stop(), and tze_seq_show() retrieve it from s->private
> instead of a pointer to the thermal zone object. This will ensure
> that tz_debugfs will be valid across the "mitigations" file accesses
> until thermal_debugfs_remove_id() called by thermal_debug_tz_remove()
> removes that file.
>
> To address the second problem, use tz->lock in thermal_debug_tz_remove()
> around the tz->debugfs value check (in case the same thermal zone is
> removed at the same time in two differet threads) and its reset to NULL.

s/differet/different/

>
> Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes")
> Cc :6.8+ <[email protected]> # 6.8+
> Signed-off-by: Rafael J. Wysocki <[email protected]>
> ---
>
> This is an alternative fix for the issues addressed by
>
> https://lore.kernel.org/linux-pm/1888579.tdWV9SEqCh@kreacher/
>
> and I slightly prefer it, because it is less intrusive and makes
> the thermal zone debug code more consistent with the analogous code
> for cdevs.

I also prefer this one.

>
> Accordingly, I've replace the above with this patch in the
> thermal-core-next branch in linux-pm.git.
>
> ---
> drivers/thermal/thermal_debugfs.c | 34 ++++++++++++++++++++++------------
> 1 file changed, 22 insertions(+), 12 deletions(-)
>
> Index: linux-pm/drivers/thermal/thermal_debugfs.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_debugfs.c
> +++ linux-pm/drivers/thermal/thermal_debugfs.c
> @@ -139,11 +139,13 @@ struct tz_episode {
> * we keep track of the current position in the history array.
> *
> * @tz_episodes: a list of thermal mitigation episodes
> + * @tz: thermal zone this object belongs to
> * @trips_crossed: an array of trip points crossed by id
> * @nr_trips: the number of trip points currently being crossed
> */
> struct tz_debugfs {
> struct list_head tz_episodes;
> + struct thermal_zone_device *tz;
> int *trips_crossed;
> int nr_trips;
> };
> @@ -710,8 +712,7 @@ out:
>
> static void *tze_seq_start(struct seq_file *s, loff_t *pos)
> {
> - struct thermal_zone_device *tz = s->private;
> - struct thermal_debugfs *thermal_dbg = tz->debugfs;
> + struct thermal_debugfs *thermal_dbg = s->private;
> struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg;
>
> mutex_lock(&thermal_dbg->lock);
> @@ -721,8 +722,7 @@ static void *tze_seq_start(struct seq_fi
>
> static void *tze_seq_next(struct seq_file *s, void *v, loff_t *pos)
> {
> - struct thermal_zone_device *tz = s->private;
> - struct thermal_debugfs *thermal_dbg = tz->debugfs;
> + struct thermal_debugfs *thermal_dbg = s->private;
> struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg;
>
> return seq_list_next(v, &tz_dbg->tz_episodes, pos);
> @@ -730,15 +730,15 @@ static void *tze_seq_next(struct seq_fil
>
> static void tze_seq_stop(struct seq_file *s, void *v)
> {
> - struct thermal_zone_device *tz = s->private;
> - struct thermal_debugfs *thermal_dbg = tz->debugfs;
> + struct thermal_debugfs *thermal_dbg = s->private;
>
> mutex_unlock(&thermal_dbg->lock);
> }
>
> static int tze_seq_show(struct seq_file *s, void *v)
> {
> - struct thermal_zone_device *tz = s->private;
> + struct thermal_debugfs *thermal_dbg = s->private;
> + struct thermal_zone_device *tz = thermal_dbg->tz_dbg.tz;
> struct thermal_trip_desc *td;
> struct tz_episode *tze;
> const char *type;
> @@ -816,6 +816,8 @@ void thermal_debug_tz_add(struct thermal
>
> tz_dbg = &thermal_dbg->tz_dbg;
>
> + tz_dbg->tz = tz;
> +
> tz_dbg->trips_crossed = kzalloc(sizeof(int) * tz->num_trips, GFP_KERNEL);
> if (!tz_dbg->trips_crossed) {
> thermal_debugfs_remove_id(thermal_dbg);
> @@ -824,20 +826,30 @@ void thermal_debug_tz_add(struct thermal
>
> INIT_LIST_HEAD(&tz_dbg->tz_episodes);
>
> - debugfs_create_file("mitigations", 0400, thermal_dbg->d_top, tz, &tze_fops);
> + debugfs_create_file("mitigations", 0400, thermal_dbg->d_top,
> + thermal_dbg, &tze_fops);
>
> tz->debugfs = thermal_dbg;
> }
>
> void thermal_debug_tz_remove(struct thermal_zone_device *tz)
> {
> - struct thermal_debugfs *thermal_dbg = tz->debugfs;
> + struct thermal_debugfs *thermal_dbg;
> struct tz_episode *tze, *tmp;
> struct tz_debugfs *tz_dbg;
> int *trips_crossed;
>
> - if (!thermal_dbg)
> + mutex_lock(&tz->lock);
> +
> + thermal_dbg = tz->debugfs;
> + if (!thermal_dbg) {
> + mutex_unlock(&tz->lock);
> return;
> + }
> +
> + tz->debugfs = NULL;
> +
> + mutex_unlock(&tz->lock);
>
> tz_dbg = &thermal_dbg->tz_dbg;
>
> @@ -850,8 +862,6 @@ void thermal_debug_tz_remove(struct ther
> kfree(tze);
> }
>
> - tz->debugfs = NULL;
> -
> mutex_unlock(&thermal_dbg->lock);
>
> thermal_debugfs_remove_id(thermal_dbg);
>
>
>

LGTM, with that minor spelling fixed:

Reviewed-by: Lukasz Luba <[email protected]>