Platform drivers .remove callbacks are not supposed to fail and report
errors: such errors are indeed ignored by the core platform drivers stack
and the driver unbind process is anyway completed.
The SCMI core platform driver as it is now, instead, bails out reporting
an error in case of an explicit unbind request.
Fix the removal path by adding proper device links between the core SCMI
device and the SCMI protocol devices so as to cause a full SCMI stack
unbind when the core driver is removed: the remove process does not bail
out anymore on the anomalous conditions triggered by an explicit unbind
but the user is still warned.
Reported-by: Uwe Kleine-König <[email protected]>
Signed-off-by: Cristian Marussi <[email protected]>
---
drivers/firmware/arm_scmi/bus.c | 11 +++++++++++
drivers/firmware/arm_scmi/common.h | 1 +
drivers/firmware/arm_scmi/driver.c | 31 ++++++++++++++++++------------
3 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c
index d4e23101448a..35bb70724d44 100644
--- a/drivers/firmware/arm_scmi/bus.c
+++ b/drivers/firmware/arm_scmi/bus.c
@@ -216,9 +216,20 @@ void scmi_device_destroy(struct scmi_device *scmi_dev)
device_unregister(&scmi_dev->dev);
}
+void scmi_device_link_add(struct device *consumer, struct device *supplier)
+{
+ struct device_link *link;
+
+ link = device_link_add(consumer, supplier, DL_FLAG_AUTOREMOVE_CONSUMER);
+
+ WARN_ON(!link);
+}
+
void scmi_set_handle(struct scmi_device *scmi_dev)
{
scmi_dev->handle = scmi_handle_get(&scmi_dev->dev);
+ if (scmi_dev->handle)
+ scmi_device_link_add(&scmi_dev->dev, scmi_dev->handle->dev);
}
int scmi_protocol_register(const struct scmi_protocol *proto)
diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h
index 61aba7447c32..9b87b5b69535 100644
--- a/drivers/firmware/arm_scmi/common.h
+++ b/drivers/firmware/arm_scmi/common.h
@@ -97,6 +97,7 @@ static inline void unpack_scmi_header(u32 msg_hdr, struct scmi_msg_hdr *hdr)
struct scmi_revision_info *
scmi_revision_area_get(const struct scmi_protocol_handle *ph);
int scmi_handle_put(const struct scmi_handle *handle);
+void scmi_device_link_add(struct device *consumer, struct device *supplier);
struct scmi_handle *scmi_handle_get(struct device *dev);
void scmi_set_handle(struct scmi_device *scmi_dev);
void scmi_setup_protocol_implemented(const struct scmi_protocol_handle *ph,
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 609ebedee9cb..7e19b6055d75 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -2273,10 +2273,16 @@ int scmi_protocol_device_request(const struct scmi_device_id *id_table)
sdev = scmi_get_protocol_device(child, info,
id_table->protocol_id,
id_table->name);
- /* Set handle if not already set: device existed */
- if (sdev && !sdev->handle)
- sdev->handle =
- scmi_handle_get_from_info_unlocked(info);
+ if (sdev) {
+ /* Set handle if not already set: device existed */
+ if (!sdev->handle)
+ sdev->handle =
+ scmi_handle_get_from_info_unlocked(info);
+ /* Relink consumer and suppliers */
+ if (sdev->handle)
+ scmi_device_link_add(&sdev->dev,
+ sdev->handle->dev);
+ }
} else {
dev_err(info->dev,
"Failed. SCMI protocol %d not active.\n",
@@ -2475,20 +2481,17 @@ void scmi_free_channel(struct scmi_chan_info *cinfo, struct idr *idr, int id)
static int scmi_remove(struct platform_device *pdev)
{
- int ret = 0, id;
+ int ret, id;
struct scmi_info *info = platform_get_drvdata(pdev);
struct device_node *child;
mutex_lock(&scmi_list_mutex);
if (info->users)
- ret = -EBUSY;
- else
- list_del(&info->node);
+ dev_warn(&pdev->dev,
+ "Still active SCMI users will be forcibly unbound.\n");
+ list_del(&info->node);
mutex_unlock(&scmi_list_mutex);
- if (ret)
- return ret;
-
scmi_notification_exit(&info->handle);
mutex_lock(&info->protocols_mtx);
@@ -2500,7 +2503,11 @@ static int scmi_remove(struct platform_device *pdev)
idr_destroy(&info->active_protocols);
/* Safe to free channels since no more users */
- return scmi_cleanup_txrx_channels(info);
+ ret = scmi_cleanup_txrx_channels(info);
+ if (ret)
+ dev_warn(&pdev->dev, "Failed to cleanup SCMI channels.\n");
+
+ return 0;
}
static ssize_t protocol_version_show(struct device *dev,
--
2.34.1
Available sensors are enumerated and reported by the SCMI platform server
using a 16bit identification number; not all such sensors are of a type
supported by hwmon subsystem and, among the supported ones, only a subset
could be temperature sensors that have to be registered with the Thermal
Framework.
Potential clashes between hwmon channels indexes and the underlying real
sensors IDs do not play well with the hwmon<-->thermal bridge automatic
registration routines and could need a sensible number of fake dummy
sensors to be made up in order to keep indexes and IDs in sync.
Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
attribute and instead explicit register temperature sensors directly with
the Thermal Framework.
Cc: Daniel Lezcano <[email protected]>
Cc: Guenter Roeck <[email protected]>
Cc: [email protected]
Signed-off-by: Cristian Marussi <[email protected]>
---
drivers/hwmon/scmi-hwmon.c | 115 ++++++++++++++++++++++++++++++++-----
1 file changed, 102 insertions(+), 13 deletions(-)
diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c
index b1329a58ce40..124fe8ee1b9b 100644
--- a/drivers/hwmon/scmi-hwmon.c
+++ b/drivers/hwmon/scmi-hwmon.c
@@ -20,6 +20,11 @@ struct scmi_sensors {
const struct scmi_sensor_info **info[hwmon_max];
};
+struct scmi_thermal_sensor {
+ const struct scmi_protocol_handle *ph;
+ const struct scmi_sensor_info *info;
+};
+
static inline u64 __pow10(u8 x)
{
u64 r = 1;
@@ -64,16 +69,14 @@ static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value)
return 0;
}
-static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
- u32 attr, int channel, long *val)
+static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph,
+ const struct scmi_sensor_info *sensor,
+ long *val)
{
int ret;
u64 value;
- const struct scmi_sensor_info *sensor;
- struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
- sensor = *(scmi_sensors->info[type] + channel);
- ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value);
+ ret = sensor_ops->reading_get(ph, sensor->id, &value);
if (ret)
return ret;
@@ -84,6 +87,17 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
return ret;
}
+static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ const struct scmi_sensor_info *sensor;
+ struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
+
+ sensor = *(scmi_sensors->info[type] + channel);
+
+ return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val);
+}
+
static int
scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, const char **str)
@@ -122,6 +136,25 @@ static struct hwmon_chip_info scmi_chip_info = {
.info = NULL,
};
+static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz,
+ int *temp)
+{
+ int ret;
+ long value;
+ struct scmi_thermal_sensor *th_sensor = tz->devdata;
+
+ ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info,
+ &value);
+ if (!ret)
+ *temp = value;
+
+ return ret;
+}
+
+static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = {
+ .get_temp = scmi_hwmon_thermal_get_temp,
+};
+
static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan,
struct device *dev, int num,
enum hwmon_sensor_types type, u32 config)
@@ -149,7 +182,6 @@ static enum hwmon_sensor_types scmi_types[] = {
};
static u32 hwmon_attributes[hwmon_max] = {
- [hwmon_chip] = HWMON_C_REGISTER_TZ,
[hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
[hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
[hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
@@ -157,6 +189,43 @@ static u32 hwmon_attributes[hwmon_max] = {
[hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL,
};
+static int scmi_thermal_sensor_register(struct device *dev,
+ const struct scmi_protocol_handle *ph,
+ const struct scmi_sensor_info *sensor)
+{
+ struct scmi_thermal_sensor *th_sensor;
+ struct thermal_zone_device *tzd;
+
+ th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL);
+ if (!th_sensor)
+ return -ENOMEM;
+
+ th_sensor->ph = ph;
+ th_sensor->info = sensor;
+
+ /*
+ * Try to register a temperature sensor with the Thermal Framework:
+ * skip sensors not defined as part of any thermal zone (-ENODEV) but
+ * report any other errors related to misconfigured zones/sensors.
+ */
+ tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
+ &scmi_hwmon_thermal_ops);
+ if (IS_ERR(tzd)) {
+ devm_kfree(dev, th_sensor);
+
+ if (PTR_ERR(tzd) != -ENODEV)
+ return PTR_ERR(tzd);
+
+ dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
+ sensor->name);
+ } else {
+ dev_dbg(dev, "Sensor '%s' attached to thermal zone ID:%d\n",
+ sensor->name, tzd->id);
+ }
+
+ return 0;
+}
+
static int scmi_hwmon_probe(struct scmi_device *sdev)
{
int i, idx;
@@ -164,7 +233,7 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
enum hwmon_sensor_types type;
struct scmi_sensors *scmi_sensors;
const struct scmi_sensor_info *sensor;
- int nr_count[hwmon_max] = {0}, nr_types = 0;
+ int nr_count[hwmon_max] = {0}, nr_types = 0, nr_count_temp = 0;
const struct hwmon_chip_info *chip_info;
struct device *hwdev, *dev = &sdev->dev;
struct hwmon_channel_info *scmi_hwmon_chan;
@@ -208,10 +277,8 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
}
}
- if (nr_count[hwmon_temp]) {
- nr_count[hwmon_chip]++;
- nr_types++;
- }
+ if (nr_count[hwmon_temp])
+ nr_count_temp = nr_count[hwmon_temp];
scmi_hwmon_chan = devm_kcalloc(dev, nr_types, sizeof(*scmi_hwmon_chan),
GFP_KERNEL);
@@ -262,8 +329,30 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
hwdev = devm_hwmon_device_register_with_info(dev, "scmi_sensors",
scmi_sensors, chip_info,
NULL);
+ if (IS_ERR(hwdev))
+ return PTR_ERR(hwdev);
+
+ for (i = 0; i < nr_count_temp; i++) {
+ int ret;
- return PTR_ERR_OR_ZERO(hwdev);
+ sensor = *(scmi_sensors->info[hwmon_temp] + i);
+ if (!sensor)
+ continue;
+
+ /*
+ * Warn on any misconfiguration related to thermal zones but
+ * bail out of probing only on memory errors.
+ */
+ ret = scmi_thermal_sensor_register(dev, ph, sensor);
+ if (ret == -ENOMEM)
+ return ret;
+ else if (ret)
+ dev_warn(dev,
+ "Thermal zone misconfigured for %s. err=%d\n",
+ sensor->name, ret);
+ }
+
+ return 0;
}
static const struct scmi_device_id scmi_id_table[] = {
--
2.34.1
SCMI transports based on Shared Memory, at start of transmissions, have to
wait for the shared TX channel area to be eventually freed by the SCMI
server platform before accessing the channel: in fact the channel is owned
by the SCMI server platform until marked as free by the platform itself
and, as such, cannot be used by the agent until relinquished.
As a consequence a badly misbehaving SCMI server firmware could lock the
channel indefinitely and make the kernel side SCMI stack loop forever
waiting for such channel to be freed, possibly hanging the whole boot
sequence.
Add a timeout to the existent TX waiting spin-loop so that, when the
system ends up in this situation, the SCMI stack can at least bail-out,
nosily warn the user, and abort the transmission.
Reported-by: YaxiongTian <[email protected]>
Suggested-by: YaxiongTian <[email protected]>
Cc: Vincent Guittot <[email protected]>
Cc: Etienne Carriere <[email protected]>
Cc: Florian Fainelli <[email protected]>
Signed-off-by: Cristian Marussi <[email protected]>
---
drivers/firmware/arm_scmi/common.h | 4 +++-
drivers/firmware/arm_scmi/driver.c | 1 +
drivers/firmware/arm_scmi/mailbox.c | 2 +-
drivers/firmware/arm_scmi/optee.c | 2 +-
drivers/firmware/arm_scmi/shmem.c | 31 +++++++++++++++++++++++++----
drivers/firmware/arm_scmi/smc.c | 2 +-
6 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h
index 9b87b5b69535..a1c0154c31c6 100644
--- a/drivers/firmware/arm_scmi/common.h
+++ b/drivers/firmware/arm_scmi/common.h
@@ -118,6 +118,7 @@ void scmi_protocol_release(const struct scmi_handle *handle, u8 protocol_id);
*
* @dev: Reference to device in the SCMI hierarchy corresponding to this
* channel
+ * @rx_timeout_ms: The configured RX timeout in milliseconds.
* @handle: Pointer to SCMI entity handle
* @no_completion_irq: Flag to indicate that this channel has no completion
* interrupt mechanism for synchronous commands.
@@ -127,6 +128,7 @@ void scmi_protocol_release(const struct scmi_handle *handle, u8 protocol_id);
*/
struct scmi_chan_info {
struct device *dev;
+ unsigned int rx_timeout_ms;
struct scmi_handle *handle;
bool no_completion_irq;
void *transport_info;
@@ -233,7 +235,7 @@ void scmi_free_channel(struct scmi_chan_info *cinfo, struct idr *idr, int id);
struct scmi_shared_mem;
void shmem_tx_prepare(struct scmi_shared_mem __iomem *shmem,
- struct scmi_xfer *xfer);
+ struct scmi_xfer *xfer, struct scmi_chan_info *cinfo);
u32 shmem_read_header(struct scmi_shared_mem __iomem *shmem);
void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem,
struct scmi_xfer *xfer);
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 94be633b55a0..985775f210f9 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -2013,6 +2013,7 @@ static int scmi_chan_setup(struct scmi_info *info, struct device *dev,
return -ENOMEM;
cinfo->dev = dev;
+ cinfo->rx_timeout_ms = info->desc->max_rx_timeout_ms;
ret = info->desc->ops->chan_setup(cinfo, info->dev, tx);
if (ret)
diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c
index 08ff4d110beb..1e40cb035044 100644
--- a/drivers/firmware/arm_scmi/mailbox.c
+++ b/drivers/firmware/arm_scmi/mailbox.c
@@ -36,7 +36,7 @@ static void tx_prepare(struct mbox_client *cl, void *m)
{
struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl);
- shmem_tx_prepare(smbox->shmem, m);
+ shmem_tx_prepare(smbox->shmem, m, smbox->cinfo);
}
static void rx_callback(struct mbox_client *cl, void *m)
diff --git a/drivers/firmware/arm_scmi/optee.c b/drivers/firmware/arm_scmi/optee.c
index f42dad997ac9..2a7aeab40e54 100644
--- a/drivers/firmware/arm_scmi/optee.c
+++ b/drivers/firmware/arm_scmi/optee.c
@@ -498,7 +498,7 @@ static int scmi_optee_send_message(struct scmi_chan_info *cinfo,
msg_tx_prepare(channel->req.msg, xfer);
ret = invoke_process_msg_channel(channel, msg_command_size(xfer));
} else {
- shmem_tx_prepare(channel->req.shmem, xfer);
+ shmem_tx_prepare(channel->req.shmem, xfer, cinfo);
ret = invoke_process_smt_channel(channel);
}
diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c
index 0e3eaea5d852..1dfe534b8518 100644
--- a/drivers/firmware/arm_scmi/shmem.c
+++ b/drivers/firmware/arm_scmi/shmem.c
@@ -5,10 +5,13 @@
* Copyright (C) 2019 ARM Ltd.
*/
+#include <linux/ktime.h>
#include <linux/io.h>
#include <linux/processor.h>
#include <linux/types.h>
+#include <asm-generic/bug.h>
+
#include "common.h"
/*
@@ -30,16 +33,36 @@ struct scmi_shared_mem {
};
void shmem_tx_prepare(struct scmi_shared_mem __iomem *shmem,
- struct scmi_xfer *xfer)
+ struct scmi_xfer *xfer, struct scmi_chan_info *cinfo)
{
+ ktime_t stop;
+
/*
* Ideally channel must be free by now unless OS timeout last
* request and platform continued to process the same, wait
* until it releases the shared memory, otherwise we may endup
- * overwriting its response with new message payload or vice-versa
+ * overwriting its response with new message payload or vice-versa.
+ * Giving up anyway after twice the expected channel timeout so as
+ * not to bail-out on intermittent issues where the platform is
+ * occasionally a bit slower to answer.
+ *
+ * Note that after a timeout is detected we bail-out and carry on but
+ * the transport functionality is probably permanently compromised:
+ * this is just to ease debugging and avoid complete hangs on boot
+ * due to a misbehaving SCMI firmware.
*/
- spin_until_cond(ioread32(&shmem->channel_status) &
- SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
+ stop = ktime_add_ms(ktime_get(), 2 * cinfo->rx_timeout_ms);
+ spin_until_cond((ioread32(&shmem->channel_status) &
+ SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE) ||
+ ktime_after(ktime_get(), stop));
+ if (!(ioread32(&shmem->channel_status) &
+ SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE)) {
+ WARN_ON_ONCE(1);
+ dev_err(cinfo->dev,
+ "Timeout waiting for a free TX channel !\n");
+ return;
+ }
+
/* Mark channel busy + clear error */
iowrite32(0x0, &shmem->channel_status);
iowrite32(xfer->hdr.poll_completion ? 0 : SCMI_SHMEM_FLAG_INTR_ENABLED,
diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c
index 745acfdd0b3d..87a7b13cf868 100644
--- a/drivers/firmware/arm_scmi/smc.c
+++ b/drivers/firmware/arm_scmi/smc.c
@@ -188,7 +188,7 @@ static int smc_send_message(struct scmi_chan_info *cinfo,
*/
smc_channel_lock_acquire(scmi_info, xfer);
- shmem_tx_prepare(scmi_info->shmem, xfer);
+ shmem_tx_prepare(scmi_info->shmem, xfer, cinfo);
arm_smccc_1_1_invoke(scmi_info->func_id, 0, 0, 0, 0, 0, 0, 0, &res);
--
2.34.1
SCMI Virtio transport device managed allocations must use the main
platform device in devres operations instead of the channel devices.
Cc: Peter Hilber <[email protected]>
Fixes: 46abe13b5e3d ("firmware: arm_scmi: Add virtio transport")
Signed-off-by: Cristian Marussi <[email protected]>
---
Since as of now (v6.1-rc2) cinfo->dev == info->dev in virtio (given
ONLY one channel can exist in virtio transport), this change is sort
of more semantic than real as of now, BUT it will be needed fully soon,
once the per-transport-devices will be adopted (part of an unrelated
series) since in that case cinfo->dev != info->dev for virtio too.
So maybe I can fold this one in the SCMI Raw series carrying transport
devices patch, if you prefer.
---
drivers/firmware/arm_scmi/virtio.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c
index 14709dbc96a1..36b7686843a4 100644
--- a/drivers/firmware/arm_scmi/virtio.c
+++ b/drivers/firmware/arm_scmi/virtio.c
@@ -444,12 +444,12 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
for (i = 0; i < vioch->max_msg; i++) {
struct scmi_vio_msg *msg;
- msg = devm_kzalloc(cinfo->dev, sizeof(*msg), GFP_KERNEL);
+ msg = devm_kzalloc(dev, sizeof(*msg), GFP_KERNEL);
if (!msg)
return -ENOMEM;
if (tx) {
- msg->request = devm_kzalloc(cinfo->dev,
+ msg->request = devm_kzalloc(dev,
VIRTIO_SCMI_MAX_PDU_SIZE,
GFP_KERNEL);
if (!msg->request)
@@ -458,7 +458,7 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
refcount_set(&msg->users, 1);
}
- msg->input = devm_kzalloc(cinfo->dev, VIRTIO_SCMI_MAX_PDU_SIZE,
+ msg->input = devm_kzalloc(dev, VIRTIO_SCMI_MAX_PDU_SIZE,
GFP_KERNEL);
if (!msg->input)
return -ENOMEM;
--
2.34.1
On Fri, Oct 28, 2022 at 08:11:59AM -0700, Guenter Roeck wrote:
> On 10/28/22 07:08, Cristian Marussi wrote:
> > Available sensors are enumerated and reported by the SCMI platform server
> > using a 16bit identification number; not all such sensors are of a type
> > supported by hwmon subsystem and, among the supported ones, only a subset
> > could be temperature sensors that have to be registered with the Thermal
> > Framework.
> > Potential clashes between hwmon channels indexes and the underlying real
> > sensors IDs do not play well with the hwmon<-->thermal bridge automatic
> > registration routines and could need a sensible number of fake dummy
> > sensors to be made up in order to keep indexes and IDs in sync.
> >
> > Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
> > attribute and instead explicit register temperature sensors directly with
> > the Thermal Framework.
> >
>
>
> For my reference:
>
> Reviewed-by: Guenter Roeck <[email protected]>
>
> $subject says "patch 7/8". Patches 1-6 are firmware patches. Does this patch
> depend on the other patches of the series or can I apply it on its own ?
Thanks for having a look first of all !
This patch can be applied on its own...it's just that I have bundled
together a bunch of fixes (... this being probably a bit too big really it
should have been on its own, sorry for that...)
>
> Additional comment inline below.
>
> Thanks,
> Guenter
>
> > Cc: Daniel Lezcano <[email protected]>
> > Cc: Guenter Roeck <[email protected]>
> > Cc: [email protected]
> > Signed-off-by: Cristian Marussi <[email protected]>
> > ---
> > drivers/hwmon/scmi-hwmon.c | 115 ++++++++++++++++++++++++++++++++-----
> > 1 file changed, 102 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c
> > index b1329a58ce40..124fe8ee1b9b 100644
> > --- a/drivers/hwmon/scmi-hwmon.c
> > +++ b/drivers/hwmon/scmi-hwmon.c
> > @@ -20,6 +20,11 @@ struct scmi_sensors {
> > const struct scmi_sensor_info **info[hwmon_max];
> > };
> > +struct scmi_thermal_sensor {
> > + const struct scmi_protocol_handle *ph;
> > + const struct scmi_sensor_info *info;
> > +};
> > +
> > static inline u64 __pow10(u8 x)
> > {
> > u64 r = 1;
> > @@ -64,16 +69,14 @@ static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value)
> > return 0;
> > }
> > -static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> > - u32 attr, int channel, long *val)
> > +static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph,
> > + const struct scmi_sensor_info *sensor,
> > + long *val)
> > {
> > int ret;
> > u64 value;
> > - const struct scmi_sensor_info *sensor;
> > - struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
> > - sensor = *(scmi_sensors->info[type] + channel);
> > - ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value);
> > + ret = sensor_ops->reading_get(ph, sensor->id, &value);
> > if (ret)
> > return ret;
> > @@ -84,6 +87,17 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> > return ret;
> > }
> > +static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> > + u32 attr, int channel, long *val)
> > +{
> > + const struct scmi_sensor_info *sensor;
> > + struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
> > +
> > + sensor = *(scmi_sensors->info[type] + channel);
> > +
> > + return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val);
> > +}
> > +
> > static int
> > scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
> > u32 attr, int channel, const char **str)
> > @@ -122,6 +136,25 @@ static struct hwmon_chip_info scmi_chip_info = {
> > .info = NULL,
> > };
> > +static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz,
> > + int *temp)
> > +{
> > + int ret;
> > + long value;
> > + struct scmi_thermal_sensor *th_sensor = tz->devdata;
> > +
> > + ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info,
> > + &value);
> > + if (!ret)
> > + *temp = value;
> > +
> > + return ret;
> > +}
> > +
> > +static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = {
> > + .get_temp = scmi_hwmon_thermal_get_temp,
> > +};
> > +
> > static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan,
> > struct device *dev, int num,
> > enum hwmon_sensor_types type, u32 config)
> > @@ -149,7 +182,6 @@ static enum hwmon_sensor_types scmi_types[] = {
> > };
> > static u32 hwmon_attributes[hwmon_max] = {
> > - [hwmon_chip] = HWMON_C_REGISTER_TZ,
> > [hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
> > [hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
> > [hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
> > @@ -157,6 +189,43 @@ static u32 hwmon_attributes[hwmon_max] = {
> > [hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL,
> > };
> > +static int scmi_thermal_sensor_register(struct device *dev,
> > + const struct scmi_protocol_handle *ph,
> > + const struct scmi_sensor_info *sensor)
> > +{
> > + struct scmi_thermal_sensor *th_sensor;
> > + struct thermal_zone_device *tzd;
> > +
> > + th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL);
> > + if (!th_sensor)
> > + return -ENOMEM;
> > +
> > + th_sensor->ph = ph;
> > + th_sensor->info = sensor;
> > +
> > + /*
> > + * Try to register a temperature sensor with the Thermal Framework:
> > + * skip sensors not defined as part of any thermal zone (-ENODEV) but
> > + * report any other errors related to misconfigured zones/sensors.
> > + */
> > + tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
> > + &scmi_hwmon_thermal_ops);
> > + if (IS_ERR(tzd)) {
> > + devm_kfree(dev, th_sensor);
> > +
> > + if (PTR_ERR(tzd) != -ENODEV)
> > + return PTR_ERR(tzd);
> > +
> > + dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
> > + sensor->name);
>
> There were complaints about this message as it is noisy. If you send
> another version, please drop it unless attaching each sensor to a thermal
> zone is strongly expected. If you don't send another version, I'll drop it
> while applying.
>
Ok fine for me. I am waiting to have some feedback from Sudeep too, but
I do not have plan for another version as of now.
As a side note, though, I understand the 'noisiness' argument, but,
sincerely this same message in the original HWMON code was the only
reason why I spotted that something was wrong with the SCMI/HWMON
interactions and discovered the indexes/ids mismatch...if not for
that it would have gone un-noticed that a perfectly configured
ThermalZone/Sensor was not working properly...
(un-noticed at least until something would have been burnt to fire
in my house .. joking :P)
Thanks,
Cristian
On 10/28/22 07:08, Cristian Marussi wrote:
> Available sensors are enumerated and reported by the SCMI platform server
> using a 16bit identification number; not all such sensors are of a type
> supported by hwmon subsystem and, among the supported ones, only a subset
> could be temperature sensors that have to be registered with the Thermal
> Framework.
> Potential clashes between hwmon channels indexes and the underlying real
> sensors IDs do not play well with the hwmon<-->thermal bridge automatic
> registration routines and could need a sensible number of fake dummy
> sensors to be made up in order to keep indexes and IDs in sync.
>
> Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
> attribute and instead explicit register temperature sensors directly with
> the Thermal Framework.
>
For my reference:
Reviewed-by: Guenter Roeck <[email protected]>
$subject says "patch 7/8". Patches 1-6 are firmware patches. Does this patch
depend on the other patches of the series or can I apply it on its own ?
Additional comment inline below.
Thanks,
Guenter
> Cc: Daniel Lezcano <[email protected]>
> Cc: Guenter Roeck <[email protected]>
> Cc: [email protected]
> Signed-off-by: Cristian Marussi <[email protected]>
> ---
> drivers/hwmon/scmi-hwmon.c | 115 ++++++++++++++++++++++++++++++++-----
> 1 file changed, 102 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c
> index b1329a58ce40..124fe8ee1b9b 100644
> --- a/drivers/hwmon/scmi-hwmon.c
> +++ b/drivers/hwmon/scmi-hwmon.c
> @@ -20,6 +20,11 @@ struct scmi_sensors {
> const struct scmi_sensor_info **info[hwmon_max];
> };
>
> +struct scmi_thermal_sensor {
> + const struct scmi_protocol_handle *ph;
> + const struct scmi_sensor_info *info;
> +};
> +
> static inline u64 __pow10(u8 x)
> {
> u64 r = 1;
> @@ -64,16 +69,14 @@ static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value)
> return 0;
> }
>
> -static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> - u32 attr, int channel, long *val)
> +static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph,
> + const struct scmi_sensor_info *sensor,
> + long *val)
> {
> int ret;
> u64 value;
> - const struct scmi_sensor_info *sensor;
> - struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
>
> - sensor = *(scmi_sensors->info[type] + channel);
> - ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value);
> + ret = sensor_ops->reading_get(ph, sensor->id, &value);
> if (ret)
> return ret;
>
> @@ -84,6 +87,17 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> return ret;
> }
>
> +static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> + u32 attr, int channel, long *val)
> +{
> + const struct scmi_sensor_info *sensor;
> + struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
> +
> + sensor = *(scmi_sensors->info[type] + channel);
> +
> + return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val);
> +}
> +
> static int
> scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
> u32 attr, int channel, const char **str)
> @@ -122,6 +136,25 @@ static struct hwmon_chip_info scmi_chip_info = {
> .info = NULL,
> };
>
> +static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz,
> + int *temp)
> +{
> + int ret;
> + long value;
> + struct scmi_thermal_sensor *th_sensor = tz->devdata;
> +
> + ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info,
> + &value);
> + if (!ret)
> + *temp = value;
> +
> + return ret;
> +}
> +
> +static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = {
> + .get_temp = scmi_hwmon_thermal_get_temp,
> +};
> +
> static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan,
> struct device *dev, int num,
> enum hwmon_sensor_types type, u32 config)
> @@ -149,7 +182,6 @@ static enum hwmon_sensor_types scmi_types[] = {
> };
>
> static u32 hwmon_attributes[hwmon_max] = {
> - [hwmon_chip] = HWMON_C_REGISTER_TZ,
> [hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
> [hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
> [hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
> @@ -157,6 +189,43 @@ static u32 hwmon_attributes[hwmon_max] = {
> [hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL,
> };
>
> +static int scmi_thermal_sensor_register(struct device *dev,
> + const struct scmi_protocol_handle *ph,
> + const struct scmi_sensor_info *sensor)
> +{
> + struct scmi_thermal_sensor *th_sensor;
> + struct thermal_zone_device *tzd;
> +
> + th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL);
> + if (!th_sensor)
> + return -ENOMEM;
> +
> + th_sensor->ph = ph;
> + th_sensor->info = sensor;
> +
> + /*
> + * Try to register a temperature sensor with the Thermal Framework:
> + * skip sensors not defined as part of any thermal zone (-ENODEV) but
> + * report any other errors related to misconfigured zones/sensors.
> + */
> + tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
> + &scmi_hwmon_thermal_ops);
> + if (IS_ERR(tzd)) {
> + devm_kfree(dev, th_sensor);
> +
> + if (PTR_ERR(tzd) != -ENODEV)
> + return PTR_ERR(tzd);
> +
> + dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
> + sensor->name);
There were complaints about this message as it is noisy. If you send
another version, please drop it unless attaching each sensor to a thermal
zone is strongly expected. If you don't send another version, I'll drop it
while applying.
> + } else {
> + dev_dbg(dev, "Sensor '%s' attached to thermal zone ID:%d\n",
> + sensor->name, tzd->id);
> + }
> +
> + return 0;
> +}
> +
> static int scmi_hwmon_probe(struct scmi_device *sdev)
> {
> int i, idx;
> @@ -164,7 +233,7 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
> enum hwmon_sensor_types type;
> struct scmi_sensors *scmi_sensors;
> const struct scmi_sensor_info *sensor;
> - int nr_count[hwmon_max] = {0}, nr_types = 0;
> + int nr_count[hwmon_max] = {0}, nr_types = 0, nr_count_temp = 0;
> const struct hwmon_chip_info *chip_info;
> struct device *hwdev, *dev = &sdev->dev;
> struct hwmon_channel_info *scmi_hwmon_chan;
> @@ -208,10 +277,8 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
> }
> }
>
> - if (nr_count[hwmon_temp]) {
> - nr_count[hwmon_chip]++;
> - nr_types++;
> - }
> + if (nr_count[hwmon_temp])
> + nr_count_temp = nr_count[hwmon_temp];
>
> scmi_hwmon_chan = devm_kcalloc(dev, nr_types, sizeof(*scmi_hwmon_chan),
> GFP_KERNEL);
> @@ -262,8 +329,30 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
> hwdev = devm_hwmon_device_register_with_info(dev, "scmi_sensors",
> scmi_sensors, chip_info,
> NULL);
> + if (IS_ERR(hwdev))
> + return PTR_ERR(hwdev);
> +
> + for (i = 0; i < nr_count_temp; i++) {
> + int ret;
>
> - return PTR_ERR_OR_ZERO(hwdev);
> + sensor = *(scmi_sensors->info[hwmon_temp] + i);
> + if (!sensor)
> + continue;
> +
> + /*
> + * Warn on any misconfiguration related to thermal zones but
> + * bail out of probing only on memory errors.
> + */
> + ret = scmi_thermal_sensor_register(dev, ph, sensor);
> + if (ret == -ENOMEM)
> + return ret;
> + else if (ret)
> + dev_warn(dev,
> + "Thermal zone misconfigured for %s. err=%d\n",
> + sensor->name, ret);
> + }
> +
> + return 0;
> }
>
> static const struct scmi_device_id scmi_id_table[] = {
On 10/28/22 08:35, Cristian Marussi wrote:
[ ... ]
>>> + /*
>>> + * Try to register a temperature sensor with the Thermal Framework:
>>> + * skip sensors not defined as part of any thermal zone (-ENODEV) but
>>> + * report any other errors related to misconfigured zones/sensors.
>>> + */
>>> + tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
>>> + &scmi_hwmon_thermal_ops);
>>> + if (IS_ERR(tzd)) {
>>> + devm_kfree(dev, th_sensor);
>>> +
>>> + if (PTR_ERR(tzd) != -ENODEV)
>>> + return PTR_ERR(tzd);
>>> +
>>> + dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
>>> + sensor->name);
>>
>> There were complaints about this message as it is noisy. If you send
>> another version, please drop it unless attaching each sensor to a thermal
>> zone is strongly expected. If you don't send another version, I'll drop it
>> while applying.
>>
>
> Ok fine for me. I am waiting to have some feedback from Sudeep too, but
> I do not have plan for another version as of now.
>
> As a side note, though, I understand the 'noisiness' argument, but,
> sincerely this same message in the original HWMON code was the only
> reason why I spotted that something was wrong with the SCMI/HWMON
> interactions and discovered the indexes/ids mismatch...if not for
> that it would have gone un-noticed that a perfectly configured
> ThermalZone/Sensor was not working properly...
> (un-noticed at least until something would have been burnt to fire
> in my house .. joking :P)
>
Good point.
Did you ever check the returned error code ? Maybe we could use it to
distinguish "it is not attached to a thermal zone because it is not
associated with one" from "attaching to a thermal zone failed because
its configuration is bad/incomplete".
Thanks,
Guenter
On 10/28/22 09:15, Cristian Marussi wrote:
> On Fri, Oct 28, 2022 at 08:58:58AM -0700, Guenter Roeck wrote:
>> On 10/28/22 08:35, Cristian Marussi wrote:
>> [ ... ]
>>>>> + /*
>>>>> + * Try to register a temperature sensor with the Thermal Framework:
>>>>> + * skip sensors not defined as part of any thermal zone (-ENODEV) but
>>>>> + * report any other errors related to misconfigured zones/sensors.
>>>>> + */
>>>>> + tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
>>>>> + &scmi_hwmon_thermal_ops);
>>>>> + if (IS_ERR(tzd)) {
>>>>> + devm_kfree(dev, th_sensor);
>>>>> +
>>>>> + if (PTR_ERR(tzd) != -ENODEV)
>>>>> + return PTR_ERR(tzd);
>>>>> +
>>>>> + dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
>>>>> + sensor->name);
>>>>
>>>> There were complaints about this message as it is noisy. If you send
>>>> another version, please drop it unless attaching each sensor to a thermal
>>>> zone is strongly expected. If you don't send another version, I'll drop it
>>>> while applying.
>>>>
>>>
>>> Ok fine for me. I am waiting to have some feedback from Sudeep too, but
>>> I do not have plan for another version as of now.
>>>
>>> As a side note, though, I understand the 'noisiness' argument, but,
>>> sincerely this same message in the original HWMON code was the only
>>> reason why I spotted that something was wrong with the SCMI/HWMON
>>> interactions and discovered the indexes/ids mismatch...if not for
>>> that it would have gone un-noticed that a perfectly configured
>>> ThermalZone/Sensor was not working properly...
>>> (un-noticed at least until something would have been burnt to fire
>>> in my house .. joking :P)
>>>
>>
>> Good point.
>>
>> Did you ever check the returned error code ? Maybe we could use it to
>> distinguish "it is not attached to a thermal zone because it is not
>> associated with one" from "attaching to a thermal zone failed because
>> its configuration is bad/incomplete".
>>
>
> Yes, it is what I do already indeed, in this regards I mimicked what
> the hwmon-thermal bridge was doing.
>
> In scmi_thermal_sensor_register() this message is printed out only
> if Thermal registration returned -ENODEV and no err is reported
> (which means teh specified sensor was not found attached to any TZ),
> while in the caller of scmi_thermal_sensor_register() for any error
> returned but -ENOMEM I print:
>
> "Thermal zone misconfigured for %s. err=%d\n",
>
> since any error reported by Thermal other than ENODEV and ENOMEM
> means the DT parsing unveiled some configuration anomaly.
>
Ok, then let's hope that this finds misconfigurations and drop the
info message.
I just noticed another problem in your code:
+ if (ret == -ENOMEM)
+ return ret;
+ else if (ret)
+ dev_warn(dev,
+ "Thermal zone misconfigured for %s. err=%d\n",
+ sensor->name, ret);
Static analyzers will rightfully notice that else after return is unnecessary.
Please rewrite and drop the else. I think something like
if (ret) {
if (ret == -ENOMEM)
return ret;
dev_warn(dev,
"Thermal zone misconfigured for %s. err=%d\n",
sensor->name, ret);
}
would be better since ret would only be evaluated once in the no-error case.
Thanks,
Guenter
On Fri, Oct 28, 2022 at 08:58:58AM -0700, Guenter Roeck wrote:
> On 10/28/22 08:35, Cristian Marussi wrote:
> [ ... ]
> > > > + /*
> > > > + * Try to register a temperature sensor with the Thermal Framework:
> > > > + * skip sensors not defined as part of any thermal zone (-ENODEV) but
> > > > + * report any other errors related to misconfigured zones/sensors.
> > > > + */
> > > > + tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
> > > > + &scmi_hwmon_thermal_ops);
> > > > + if (IS_ERR(tzd)) {
> > > > + devm_kfree(dev, th_sensor);
> > > > +
> > > > + if (PTR_ERR(tzd) != -ENODEV)
> > > > + return PTR_ERR(tzd);
> > > > +
> > > > + dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
> > > > + sensor->name);
> > >
> > > There were complaints about this message as it is noisy. If you send
> > > another version, please drop it unless attaching each sensor to a thermal
> > > zone is strongly expected. If you don't send another version, I'll drop it
> > > while applying.
> > >
> >
> > Ok fine for me. I am waiting to have some feedback from Sudeep too, but
> > I do not have plan for another version as of now.
> >
> > As a side note, though, I understand the 'noisiness' argument, but,
> > sincerely this same message in the original HWMON code was the only
> > reason why I spotted that something was wrong with the SCMI/HWMON
> > interactions and discovered the indexes/ids mismatch...if not for
> > that it would have gone un-noticed that a perfectly configured
> > ThermalZone/Sensor was not working properly...
> > (un-noticed at least until something would have been burnt to fire
> > in my house .. joking :P)
> >
>
> Good point.
>
> Did you ever check the returned error code ? Maybe we could use it to
> distinguish "it is not attached to a thermal zone because it is not
> associated with one" from "attaching to a thermal zone failed because
> its configuration is bad/incomplete".
>
Yes, it is what I do already indeed, in this regards I mimicked what
the hwmon-thermal bridge was doing.
In scmi_thermal_sensor_register() this message is printed out only
if Thermal registration returned -ENODEV and no err is reported
(which means teh specified sensor was not found attached to any TZ),
while in the caller of scmi_thermal_sensor_register() for any error
returned but -ENOMEM I print:
"Thermal zone misconfigured for %s. err=%d\n",
since any error reported by Thermal other than ENODEV and ENOMEM
means the DT parsing unveiled some configuration anomaly.
Thanks,
Cristian
On Fri, Oct 28, 2022 at 09:34:05AM -0700, Guenter Roeck wrote:
> On 10/28/22 09:15, Cristian Marussi wrote:
> > On Fri, Oct 28, 2022 at 08:58:58AM -0700, Guenter Roeck wrote:
> > > On 10/28/22 08:35, Cristian Marussi wrote:
> > > [ ... ]
> > > > > > + /*
> > > > > > + * Try to register a temperature sensor with the Thermal Framework:
> > > > > > + * skip sensors not defined as part of any thermal zone (-ENODEV) but
> > > > > > + * report any other errors related to misconfigured zones/sensors.
> > > > > > + */
> > > > > > + tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
> > > > > > + &scmi_hwmon_thermal_ops);
> > > > > > + if (IS_ERR(tzd)) {
> > > > > > + devm_kfree(dev, th_sensor);
> > > > > > +
> > > > > > + if (PTR_ERR(tzd) != -ENODEV)
> > > > > > + return PTR_ERR(tzd);
> > > > > > +
> > > > > > + dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
> > > > > > + sensor->name);
> > > > >
> > > > > There were complaints about this message as it is noisy. If you send
> > > > > another version, please drop it unless attaching each sensor to a thermal
> > > > > zone is strongly expected. If you don't send another version, I'll drop it
> > > > > while applying.
> > > > >
> > > >
> > > > Ok fine for me. I am waiting to have some feedback from Sudeep too, but
> > > > I do not have plan for another version as of now.
> > > >
> > > > As a side note, though, I understand the 'noisiness' argument, but,
> > > > sincerely this same message in the original HWMON code was the only
> > > > reason why I spotted that something was wrong with the SCMI/HWMON
> > > > interactions and discovered the indexes/ids mismatch...if not for
> > > > that it would have gone un-noticed that a perfectly configured
> > > > ThermalZone/Sensor was not working properly...
> > > > (un-noticed at least until something would have been burnt to fire
> > > > in my house .. joking :P)
> > > >
> > >
> > > Good point.
> > >
> > > Did you ever check the returned error code ? Maybe we could use it to
> > > distinguish "it is not attached to a thermal zone because it is not
> > > associated with one" from "attaching to a thermal zone failed because
> > > its configuration is bad/incomplete".
> > >
> >
> > Yes, it is what I do already indeed, in this regards I mimicked what
> > the hwmon-thermal bridge was doing.
> >
> > In scmi_thermal_sensor_register() this message is printed out only
> > if Thermal registration returned -ENODEV and no err is reported
> > (which means teh specified sensor was not found attached to any TZ),
> > while in the caller of scmi_thermal_sensor_register() for any error
> > returned but -ENOMEM I print:
> >
> > "Thermal zone misconfigured for %s. err=%d\n",
> >
> > since any error reported by Thermal other than ENODEV and ENOMEM
> > means the DT parsing unveiled some configuration anomaly.
> >
>
> Ok, then let's hope that this finds misconfigurations and drop the
> info message.
The mismatch of indexes at hand won't be catched being reported by
Thermal as misconfig but just as not found ENODEV.
Anyway it is fine for me to drop the message.
>
> I just noticed another problem in your code:
>
> + if (ret == -ENOMEM)
> + return ret;
> + else if (ret)
> + dev_warn(dev,
> + "Thermal zone misconfigured for %s. err=%d\n",
> + sensor->name, ret);
>
> Static analyzers will rightfully notice that else after return is unnecessary.
> Please rewrite and drop the else. I think something like
>
Ah yes...my bad.
> if (ret) {
> if (ret == -ENOMEM)
> return ret;
> dev_warn(dev,
> "Thermal zone misconfigured for %s. err=%d\n",
> sensor->name, ret);
> }
>
> would be better since ret would only be evaluated once in the no-error case.
>
I'll resend this one with the fix and the dropped message.
Thanks,
Cristian
Available sensors are enumerated and reported by the SCMI platform server
using a 16bit identification number; not all such sensors are of a type
supported by hwmon subsystem and, among the supported ones, only a subset
could be temperature sensors that have to be registered with the Thermal
Framework.
Potential clashes between hwmon channels indexes and the underlying real
sensors IDs do not play well with the hwmon<-->thermal bridge automatic
registration routines and could need a sensible number of fake dummy
sensors to be made up in order to keep indexes and IDs in sync.
Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
attribute and instead explicit register temperature sensors directly with
the Thermal Framework.
Cc: Daniel Lezcano <[email protected]>
Cc: Guenter Roeck <[email protected]>
Cc: [email protected]
Signed-off-by: Cristian Marussi <[email protected]>
---
v1 --> v2
- refactor if conditions on thermal sensor registration
- changed dev_info to dev_dbg for message about sensors not attached
to any thermal zone
---
drivers/hwmon/scmi-hwmon.c | 116 ++++++++++++++++++++++++++++++++-----
1 file changed, 103 insertions(+), 13 deletions(-)
diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c
index b1329a58ce40..e192f0c67146 100644
--- a/drivers/hwmon/scmi-hwmon.c
+++ b/drivers/hwmon/scmi-hwmon.c
@@ -20,6 +20,11 @@ struct scmi_sensors {
const struct scmi_sensor_info **info[hwmon_max];
};
+struct scmi_thermal_sensor {
+ const struct scmi_protocol_handle *ph;
+ const struct scmi_sensor_info *info;
+};
+
static inline u64 __pow10(u8 x)
{
u64 r = 1;
@@ -64,16 +69,14 @@ static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value)
return 0;
}
-static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
- u32 attr, int channel, long *val)
+static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph,
+ const struct scmi_sensor_info *sensor,
+ long *val)
{
int ret;
u64 value;
- const struct scmi_sensor_info *sensor;
- struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
- sensor = *(scmi_sensors->info[type] + channel);
- ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value);
+ ret = sensor_ops->reading_get(ph, sensor->id, &value);
if (ret)
return ret;
@@ -84,6 +87,17 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
return ret;
}
+static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ const struct scmi_sensor_info *sensor;
+ struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
+
+ sensor = *(scmi_sensors->info[type] + channel);
+
+ return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val);
+}
+
static int
scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, const char **str)
@@ -122,6 +136,25 @@ static struct hwmon_chip_info scmi_chip_info = {
.info = NULL,
};
+static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz,
+ int *temp)
+{
+ int ret;
+ long value;
+ struct scmi_thermal_sensor *th_sensor = tz->devdata;
+
+ ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info,
+ &value);
+ if (!ret)
+ *temp = value;
+
+ return ret;
+}
+
+static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = {
+ .get_temp = scmi_hwmon_thermal_get_temp,
+};
+
static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan,
struct device *dev, int num,
enum hwmon_sensor_types type, u32 config)
@@ -149,7 +182,6 @@ static enum hwmon_sensor_types scmi_types[] = {
};
static u32 hwmon_attributes[hwmon_max] = {
- [hwmon_chip] = HWMON_C_REGISTER_TZ,
[hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
[hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
[hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
@@ -157,6 +189,43 @@ static u32 hwmon_attributes[hwmon_max] = {
[hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL,
};
+static int scmi_thermal_sensor_register(struct device *dev,
+ const struct scmi_protocol_handle *ph,
+ const struct scmi_sensor_info *sensor)
+{
+ struct scmi_thermal_sensor *th_sensor;
+ struct thermal_zone_device *tzd;
+
+ th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL);
+ if (!th_sensor)
+ return -ENOMEM;
+
+ th_sensor->ph = ph;
+ th_sensor->info = sensor;
+
+ /*
+ * Try to register a temperature sensor with the Thermal Framework:
+ * skip sensors not defined as part of any thermal zone (-ENODEV) but
+ * report any other errors related to misconfigured zones/sensors.
+ */
+ tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
+ &scmi_hwmon_thermal_ops);
+ if (IS_ERR(tzd)) {
+ devm_kfree(dev, th_sensor);
+
+ if (PTR_ERR(tzd) != -ENODEV)
+ return PTR_ERR(tzd);
+
+ dev_dbg(dev, "Sensor '%s' not attached to any thermal zone.\n",
+ sensor->name);
+ } else {
+ dev_dbg(dev, "Sensor '%s' attached to thermal zone ID:%d\n",
+ sensor->name, tzd->id);
+ }
+
+ return 0;
+}
+
static int scmi_hwmon_probe(struct scmi_device *sdev)
{
int i, idx;
@@ -164,7 +233,7 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
enum hwmon_sensor_types type;
struct scmi_sensors *scmi_sensors;
const struct scmi_sensor_info *sensor;
- int nr_count[hwmon_max] = {0}, nr_types = 0;
+ int nr_count[hwmon_max] = {0}, nr_types = 0, nr_count_temp = 0;
const struct hwmon_chip_info *chip_info;
struct device *hwdev, *dev = &sdev->dev;
struct hwmon_channel_info *scmi_hwmon_chan;
@@ -208,10 +277,8 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
}
}
- if (nr_count[hwmon_temp]) {
- nr_count[hwmon_chip]++;
- nr_types++;
- }
+ if (nr_count[hwmon_temp])
+ nr_count_temp = nr_count[hwmon_temp];
scmi_hwmon_chan = devm_kcalloc(dev, nr_types, sizeof(*scmi_hwmon_chan),
GFP_KERNEL);
@@ -262,8 +329,31 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
hwdev = devm_hwmon_device_register_with_info(dev, "scmi_sensors",
scmi_sensors, chip_info,
NULL);
+ if (IS_ERR(hwdev))
+ return PTR_ERR(hwdev);
- return PTR_ERR_OR_ZERO(hwdev);
+ for (i = 0; i < nr_count_temp; i++) {
+ int ret;
+
+ sensor = *(scmi_sensors->info[hwmon_temp] + i);
+ if (!sensor)
+ continue;
+
+ /*
+ * Warn on any misconfiguration related to thermal zones but
+ * bail out of probing only on memory errors.
+ */
+ ret = scmi_thermal_sensor_register(dev, ph, sensor);
+ if (ret) {
+ if (ret == -ENOMEM)
+ return ret;
+ dev_warn(dev,
+ "Thermal zone misconfigured for %s. err=%d\n",
+ sensor->name, ret);
+ }
+ }
+
+ return 0;
}
static const struct scmi_device_id scmi_id_table[] = {
--
2.34.1
On Mon, Oct 31, 2022 at 11:40:18AM +0000, Cristian Marussi wrote:
> Available sensors are enumerated and reported by the SCMI platform server
> using a 16bit identification number; not all such sensors are of a type
> supported by hwmon subsystem and, among the supported ones, only a subset
> could be temperature sensors that have to be registered with the Thermal
> Framework.
> Potential clashes between hwmon channels indexes and the underlying real
> sensors IDs do not play well with the hwmon<-->thermal bridge automatic
> registration routines and could need a sensible number of fake dummy
> sensors to be made up in order to keep indexes and IDs in sync.
>
> Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
> attribute and instead explicit register temperature sensors directly with
> the Thermal Framework.
>
Hi Guenter,
FWIW from scmi perspective,
Acked-by: Sudeep Holla <[email protected]>
I was about to ask for your ack to pickup myself but I see there is no
strict dependency for that. Not sure if you want to take this as fix for
v6.1 as the thermal changes broke the existing support in SCMI hwmon
or do you still prefer v6.2 ?
--
Regards,
Sudeep
On Mon, Oct 31, 2022 at 07:00:21AM -0700, Guenter Roeck wrote:
> On Mon, Oct 31, 2022 at 01:25:23PM +0000, Sudeep Holla wrote:
> > On Mon, Oct 31, 2022 at 11:40:18AM +0000, Cristian Marussi wrote:
> > > Available sensors are enumerated and reported by the SCMI platform server
> > > using a 16bit identification number; not all such sensors are of a type
> > > supported by hwmon subsystem and, among the supported ones, only a subset
> > > could be temperature sensors that have to be registered with the Thermal
> > > Framework.
> > > Potential clashes between hwmon channels indexes and the underlying real
> > > sensors IDs do not play well with the hwmon<-->thermal bridge automatic
> > > registration routines and could need a sensible number of fake dummy
> > > sensors to be made up in order to keep indexes and IDs in sync.
> > >
> > > Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
> > > attribute and instead explicit register temperature sensors directly with
> > > the Thermal Framework.
> > >
> >
> > Hi Guenter,
> >
> > FWIW from scmi perspective,
> >
> > Acked-by: Sudeep Holla <[email protected]>
> >
> > I was about to ask for your ack to pickup myself but I see there is no
> > strict dependency for that. Not sure if you want to take this as fix for
> > v6.1 as the thermal changes broke the existing support in SCMI hwmon
> > or do you still prefer v6.2 ?
> >
>
> It is a regression and should be applied to 6.1. I'll pick it up and send
> a pull request to Linus later this week.
Indeed, thanks for the confirmation.
--
Regards,
Sudeep
On Mon, Oct 31, 2022 at 01:25:23PM +0000, Sudeep Holla wrote:
> On Mon, Oct 31, 2022 at 11:40:18AM +0000, Cristian Marussi wrote:
> > Available sensors are enumerated and reported by the SCMI platform server
> > using a 16bit identification number; not all such sensors are of a type
> > supported by hwmon subsystem and, among the supported ones, only a subset
> > could be temperature sensors that have to be registered with the Thermal
> > Framework.
> > Potential clashes between hwmon channels indexes and the underlying real
> > sensors IDs do not play well with the hwmon<-->thermal bridge automatic
> > registration routines and could need a sensible number of fake dummy
> > sensors to be made up in order to keep indexes and IDs in sync.
> >
> > Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
> > attribute and instead explicit register temperature sensors directly with
> > the Thermal Framework.
> >
>
> Hi Guenter,
>
> FWIW from scmi perspective,
>
> Acked-by: Sudeep Holla <[email protected]>
>
> I was about to ask for your ack to pickup myself but I see there is no
> strict dependency for that. Not sure if you want to take this as fix for
> v6.1 as the thermal changes broke the existing support in SCMI hwmon
> or do you still prefer v6.2 ?
>
It is a regression and should be applied to 6.1. I'll pick it up and send
a pull request to Linus later this week.
Thanks,
Guenter
On Fri, 28 Oct 2022 15:08:26 +0100, Cristian Marussi wrote:
> Platform drivers .remove callbacks are not supposed to fail and report
> errors: such errors are indeed ignored by the core platform drivers stack
> and the driver unbind process is anyway completed.
>
> The SCMI core platform driver as it is now, instead, bails out reporting
> an error in case of an explicit unbind request.
>
> [...]
Applied 1-6/8 to sudeep.holla/linux (for-next/scmi), thanks!
[1/8] firmware: arm_scmi: Cleanup core driver removal callback
https://git.kernel.org/sudeep.holla/c/3f4071cbd206
[2/8] firmware: arm_scmi: Suppress bind attributes
https://git.kernel.org/sudeep.holla/c/fd96fbc8fad3
[3/8] firmware: arm_scmi: Make tx_prepare time out eventually
https://git.kernel.org/sudeep.holla/c/59172b212ec0
[4/8] firmware: arm_scmi: Make RX chan_setup fail on memory errors
https://git.kernel.org/sudeep.holla/c/be9ba1f7f9e0
[5/8] firmware: arm_scmi: Fix devres allocation device in virtio
https://git.kernel.org/sudeep.holla/c/5ffc1c4cb896
[6/8] firmware: arm_scmi: Fix deferred_tx_wq release on error paths
https://git.kernel.org/sudeep.holla/c/1eff6929aff5
--
Regards,
Sudeep