Since for some QCoM tsens controllers, its suggested to
monitor the controller health periodically and in case an
issue is detected, to re-initialize the tsens controller
via trustzone, add the support for the same in the
qcom tsens driver.
Note that Once the tsens controller is reset using scm call,
all SROT and TM region registers will enter the reset mode.
While all the SROT registers will be re-programmed and
re-enabled in trustzone prior to the scm call exit, the TM
region registers will not re-initialized in trustzone and thus
need to be handled by the tsens driver.
Cc: Amit Kucheria <[email protected]>
Cc: Thara Gopinath <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Bhupesh Sharma <[email protected]>
---
drivers/thermal/qcom/tsens-v2.c | 3 +
drivers/thermal/qcom/tsens.c | 237 +++++++++++++++++++++++++++++++-
drivers/thermal/qcom/tsens.h | 6 +
3 files changed, 239 insertions(+), 7 deletions(-)
diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c
index 61d38a56d29a..9bb542f16482 100644
--- a/drivers/thermal/qcom/tsens-v2.c
+++ b/drivers/thermal/qcom/tsens-v2.c
@@ -88,6 +88,9 @@ static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = {
/* TRDY: 1=ready, 0=in progress */
[TRDY] = REG_FIELD(TM_TRDY_OFF, 0, 0),
+
+ /* FIRST_ROUND_COMPLETE: 1=complete, 0=not complete */
+ [FIRST_ROUND_COMPLETE] = REG_FIELD(TM_TRDY_OFF, 3, 3),
};
static const struct tsens_ops ops_generic_v2 = {
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 97f4d4454f20..28d42ae0eb47 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -7,6 +7,7 @@
#include <linux/debugfs.h>
#include <linux/err.h>
#include <linux/io.h>
+#include <linux/qcom_scm.h>
#include <linux/module.h>
#include <linux/nvmem-consumer.h>
#include <linux/of.h>
@@ -21,6 +22,8 @@
#include "../thermal_hwmon.h"
#include "tsens.h"
+LIST_HEAD(tsens_device_list);
+
/**
* struct tsens_irq_data - IRQ status and temperature violations
* @up_viol: upper threshold violated
@@ -594,19 +597,159 @@ static void tsens_disable_irq(struct tsens_priv *priv)
regmap_field_write(priv->rf[INT_EN], 0);
}
+static int tsens_reenable_hw_after_scm(struct tsens_priv *priv)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->ul_lock, flags);
+
+ /* Re-enable watchdog, unmask the bark and
+ * disable cycle completion monitoring.
+ */
+ regmap_field_write(priv->rf[WDOG_BARK_CLEAR], 1);
+ regmap_field_write(priv->rf[WDOG_BARK_CLEAR], 0);
+ regmap_field_write(priv->rf[WDOG_BARK_MASK], 0);
+ regmap_field_write(priv->rf[CC_MON_MASK], 1);
+
+ /* Re-enable interrupts */
+ tsens_enable_irq(priv);
+
+ spin_unlock_irqrestore(&priv->ul_lock, flags);
+
+ return 0;
+}
+
int get_temp_tsens_valid(const struct tsens_sensor *s, int *temp)
{
- struct tsens_priv *priv = s->priv;
+ struct tsens_priv *priv = s->priv, *priv_reinit;
int hw_id = s->hw_id;
u32 temp_idx = LAST_TEMP_0 + hw_id;
u32 valid_idx = VALID_0 + hw_id;
u32 valid;
- int ret;
+ int ret, trdy, first_round, tsens_ret, sw_reg;
+ unsigned long timeout;
+ static atomic_t in_tsens_reinit;
/* VER_0 doesn't have VALID bit */
if (tsens_version(priv) == VER_0)
goto get_temp;
+ /* For some tsens controllers, its suggested to
+ * monitor the controller health periodically
+ * and in case an issue is detected to reinit
+ * tsens controller via trustzone.
+ */
+ if (priv->needs_reinit_wa) {
+ /* First check if TRDY is SET */
+ timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
+ do {
+ ret = regmap_field_read(priv->rf[TRDY], &trdy);
+ if (ret)
+ goto err;
+ if (!trdy)
+ continue;
+ } while (time_before(jiffies, timeout));
+
+ if (!trdy) {
+ ret = regmap_field_read(priv->rf[FIRST_ROUND_COMPLETE], &first_round);
+ if (ret)
+ goto err;
+
+ if (!first_round) {
+ if (atomic_read(&in_tsens_reinit)) {
+ dev_dbg(priv->dev, "tsens re-init is in progress\n");
+ ret = -EAGAIN;
+ goto err;
+ }
+
+ /* Wait for 2 ms for tsens controller to recover */
+ timeout = jiffies + msecs_to_jiffies(RESET_TIMEOUT_MS);
+ do {
+ ret = regmap_field_read(priv->rf[FIRST_ROUND_COMPLETE],
+ &first_round);
+ if (ret)
+ goto err;
+
+ if (first_round) {
+ dev_dbg(priv->dev, "tsens controller recovered\n");
+ goto sensor_read;
+ }
+ } while (time_before(jiffies, timeout));
+
+ /*
+ * tsens controller did not recover,
+ * proceed with SCM call to re-init it
+ */
+ if (atomic_read(&in_tsens_reinit)) {
+ dev_dbg(priv->dev, "tsens re-init is in progress\n");
+ ret = -EAGAIN;
+ goto err;
+ }
+
+ atomic_set(&in_tsens_reinit, 1);
+
+ /*
+ * Invoke scm call only if SW register write is
+ * reflecting in controller. Try it for 2 ms.
+ */
+ timeout = jiffies + msecs_to_jiffies(RESET_TIMEOUT_MS);
+ do {
+ ret = regmap_field_write(priv->rf[INT_EN], BIT(2));
+ if (ret)
+ goto err_unset;
+
+ ret = regmap_field_read(priv->rf[INT_EN], &sw_reg);
+ if (ret)
+ goto err_unset;
+
+ if (!(sw_reg & BIT(2)))
+ continue;
+ } while (time_before(jiffies, timeout));
+
+ if (!(sw_reg & BIT(2))) {
+ ret = -ENOTRECOVERABLE;
+ goto err_unset;
+ }
+
+ ret = qcom_scm_tsens_reinit(&tsens_ret);
+ if (ret || tsens_ret) {
+ dev_err(priv->dev, "tsens reinit scm call failed (%d : %d)\n",
+ ret, tsens_ret);
+ if (tsens_ret)
+ ret = -ENOTRECOVERABLE;
+
+ goto err_unset;
+ }
+
+ /* After the SCM call, we need to re-enable
+ * the interrupts and also set active threshold
+ * for each sensor.
+ */
+ list_for_each_entry(priv_reinit,
+ &tsens_device_list, list) {
+ ret = tsens_reenable_hw_after_scm(priv_reinit);
+ if (ret) {
+ dev_err(priv->dev,
+ "tsens re-enable after scm call failed (%d)\n",
+ ret);
+ ret = -ENOTRECOVERABLE;
+ goto err_unset;
+ }
+ }
+
+ atomic_set(&in_tsens_reinit, 0);
+
+ /* Notify reinit wa worker */
+ list_for_each_entry(priv_reinit,
+ &tsens_device_list, list) {
+ queue_work(priv_reinit->reinit_wa_worker,
+ &priv_reinit->reinit_wa_notify);
+ }
+ }
+ }
+ }
+
+sensor_read:
/* Valid bit is 0 for 6 AHB clock cycles.
* At 19.2MHz, 1 AHB clock is ~60ns.
* We should enter this loop very, very rarely.
@@ -623,6 +766,12 @@ int get_temp_tsens_valid(const struct tsens_sensor *s, int *temp)
*temp = tsens_hw_to_mC(s, temp_idx);
return 0;
+
+err_unset:
+ atomic_set(&in_tsens_reinit, 0);
+
+err:
+ return ret;
}
int get_temp_common(const struct tsens_sensor *s, int *temp)
@@ -860,6 +1009,14 @@ int __init init_common(struct tsens_priv *priv)
goto err_put_device;
}
+ priv->rf[FIRST_ROUND_COMPLETE] = devm_regmap_field_alloc(dev,
+ priv->tm_map,
+ priv->fields[FIRST_ROUND_COMPLETE]);
+ if (IS_ERR(priv->rf[FIRST_ROUND_COMPLETE])) {
+ ret = PTR_ERR(priv->rf[FIRST_ROUND_COMPLETE]);
+ goto err_put_device;
+ }
+
/* This loop might need changes if enum regfield_ids is reordered */
for (j = LAST_TEMP_0; j <= UP_THRESH_15; j += 16) {
for (i = 0; i < priv->feat->max_sensors; i++) {
@@ -1097,6 +1254,43 @@ static int tsens_register(struct tsens_priv *priv)
return ret;
}
+static void tsens_reinit_worker_notify(struct work_struct *work)
+{
+ int i, ret, temp;
+ struct tsens_irq_data d;
+ struct tsens_priv *priv = container_of(work, struct tsens_priv,
+ reinit_wa_notify);
+
+ for (i = 0; i < priv->num_sensors; i++) {
+ const struct tsens_sensor *s = &priv->sensor[i];
+ u32 hw_id = s->hw_id;
+
+ if (!s->tzd)
+ continue;
+ if (!tsens_threshold_violated(priv, hw_id, &d))
+ continue;
+
+ ret = get_temp_tsens_valid(s, &temp);
+ if (ret) {
+ dev_err(priv->dev, "[%u] %s: error reading sensor\n",
+ hw_id, __func__);
+ continue;
+ }
+
+ tsens_read_irq_state(priv, hw_id, s, &d);
+
+ if ((d.up_thresh < temp) || (d.low_thresh > temp)) {
+ dev_dbg(priv->dev, "[%u] %s: TZ update trigger (%d mC)\n",
+ hw_id, __func__, temp);
+ thermal_zone_device_update(s->tzd,
+ THERMAL_EVENT_UNSPECIFIED);
+ } else {
+ dev_dbg(priv->dev, "[%u] %s: no violation: %d\n",
+ hw_id, __func__, temp);
+ }
+ }
+}
+
static int tsens_probe(struct platform_device *pdev)
{
int ret, i;
@@ -1139,6 +1333,19 @@ static int tsens_probe(struct platform_device *pdev)
priv->dev = dev;
priv->num_sensors = num_sensors;
priv->needs_reinit_wa = data->needs_reinit_wa;
+
+ if (priv->needs_reinit_wa && !qcom_scm_is_available())
+ return -EPROBE_DEFER;
+
+ if (priv->needs_reinit_wa) {
+ priv->reinit_wa_worker = alloc_workqueue("tsens_reinit_work",
+ WQ_HIGHPRI, 0);
+ if (!priv->reinit_wa_worker)
+ return -ENOMEM;
+
+ INIT_WORK(&priv->reinit_wa_notify, tsens_reinit_worker_notify);
+ }
+
priv->ops = data->ops;
for (i = 0; i < priv->num_sensors; i++) {
if (data->hw_ids)
@@ -1151,13 +1358,15 @@ static int tsens_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, priv);
- if (!priv->ops || !priv->ops->init || !priv->ops->get_temp)
- return -EINVAL;
+ if (!priv->ops || !priv->ops->init || !priv->ops->get_temp) {
+ ret = -EINVAL;
+ goto free_wq;
+ }
ret = priv->ops->init(priv);
if (ret < 0) {
dev_err(dev, "%s: init failed\n", __func__);
- return ret;
+ goto free_wq;
}
if (priv->ops->calibrate) {
@@ -1165,11 +1374,23 @@ static int tsens_probe(struct platform_device *pdev)
if (ret < 0) {
if (ret != -EPROBE_DEFER)
dev_err(dev, "%s: calibration failed\n", __func__);
- return ret;
+
+ goto free_wq;
}
}
- return tsens_register(priv);
+ ret = tsens_register(priv);
+ if (ret < 0) {
+ dev_err(dev, "%s: registration failed\n", __func__);
+ goto free_wq;
+ }
+
+ list_add_tail(&priv->list, &tsens_device_list);
+ return 0;
+
+free_wq:
+ destroy_workqueue(priv->reinit_wa_worker);
+ return ret;
}
static int tsens_remove(struct platform_device *pdev)
@@ -1181,6 +1402,8 @@ static int tsens_remove(struct platform_device *pdev)
if (priv->ops->disable)
priv->ops->disable(priv);
+ destroy_workqueue(priv->reinit_wa_worker);
+
return 0;
}
diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h
index 48a7bda902c1..c7279a50cf9b 100644
--- a/drivers/thermal/qcom/tsens.h
+++ b/drivers/thermal/qcom/tsens.h
@@ -14,6 +14,7 @@
#define SLOPE_FACTOR 1000
#define SLOPE_DEFAULT 3200
#define TIMEOUT_US 100
+#define RESET_TIMEOUT_MS 2
#define THRESHOLD_MAX_ADC_CODE 0x3ff
#define THRESHOLD_MIN_ADC_CODE 0x0
@@ -167,6 +168,7 @@ enum regfield_ids {
/* ----- TM ------ */
/* TRDY */
TRDY,
+ FIRST_ROUND_COMPLETE,
/* INTERRUPT ENABLE */
INT_EN, /* v2+ has separate enables for crit, upper and lower irq */
/* STATUS */
@@ -565,6 +567,10 @@ struct tsens_priv {
struct regmap *srot_map;
u32 tm_offset;
bool needs_reinit_wa;
+ struct workqueue_struct *reinit_wa_worker;
+ struct work_struct reinit_wa_notify;
+
+ struct list_head list;
/* lock for upper/lower threshold interrupts */
spinlock_t ul_lock;
--
2.35.3
Hi Bhupesh,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on rafael-pm/thermal]
[also build test ERROR on linus/master v5.19-rc5 next-20220707]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Bhupesh-Sharma/Add-support-for-tsens-controller-reinit-via-trustzone/20220701-230113
base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git thermal
config: arm64-randconfig-r015-20220707 (https://download.01.org/0day-ci/archive/20220708/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/32929e13eb338e76b714bb8b4805899e2857734f
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Bhupesh-Sharma/Add-support-for-tsens-controller-reinit-via-trustzone/20220701-230113
git checkout 32929e13eb338e76b714bb8b4805899e2857734f
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=arm64 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>
All errors (new ones prefixed by >>):
aarch64-linux-ld: Unexpected GOT/PLT entries detected!
aarch64-linux-ld: Unexpected run-time procedure linkages detected!
aarch64-linux-ld: ID map text too big or misaligned
aarch64-linux-ld: drivers/thermal/qcom/tsens.o: in function `tsens_probe':
>> drivers/thermal/qcom/tsens.c:1337: undefined reference to `qcom_scm_is_available'
aarch64-linux-ld: drivers/thermal/qcom/tsens.o: in function `get_temp_tsens_valid':
>> drivers/thermal/qcom/tsens.c:714: undefined reference to `qcom_scm_tsens_reinit'
vim +1337 drivers/thermal/qcom/tsens.c
1293
1294 static int tsens_probe(struct platform_device *pdev)
1295 {
1296 int ret, i;
1297 struct device *dev;
1298 struct device_node *np;
1299 struct tsens_priv *priv;
1300 const struct tsens_plat_data *data;
1301 const struct of_device_id *id;
1302 u32 num_sensors;
1303
1304 if (pdev->dev.of_node)
1305 dev = &pdev->dev;
1306 else
1307 dev = pdev->dev.parent;
1308
1309 np = dev->of_node;
1310
1311 id = of_match_node(tsens_table, np);
1312 if (id)
1313 data = id->data;
1314 else
1315 data = &data_8960;
1316
1317 num_sensors = data->num_sensors;
1318
1319 if (np)
1320 of_property_read_u32(np, "#qcom,sensors", &num_sensors);
1321
1322 if (num_sensors <= 0) {
1323 dev_err(dev, "%s: invalid number of sensors\n", __func__);
1324 return -EINVAL;
1325 }
1326
1327 priv = devm_kzalloc(dev,
1328 struct_size(priv, sensor, num_sensors),
1329 GFP_KERNEL);
1330 if (!priv)
1331 return -ENOMEM;
1332
1333 priv->dev = dev;
1334 priv->num_sensors = num_sensors;
1335 priv->needs_reinit_wa = data->needs_reinit_wa;
1336
> 1337 if (priv->needs_reinit_wa && !qcom_scm_is_available())
1338 return -EPROBE_DEFER;
1339
1340 if (priv->needs_reinit_wa) {
1341 priv->reinit_wa_worker = alloc_workqueue("tsens_reinit_work",
1342 WQ_HIGHPRI, 0);
1343 if (!priv->reinit_wa_worker)
1344 return -ENOMEM;
1345
1346 INIT_WORK(&priv->reinit_wa_notify, tsens_reinit_worker_notify);
1347 }
1348
1349 priv->ops = data->ops;
1350 for (i = 0; i < priv->num_sensors; i++) {
1351 if (data->hw_ids)
1352 priv->sensor[i].hw_id = data->hw_ids[i];
1353 else
1354 priv->sensor[i].hw_id = i;
1355 }
1356 priv->feat = data->feat;
1357 priv->fields = data->fields;
1358
1359 platform_set_drvdata(pdev, priv);
1360
1361 if (!priv->ops || !priv->ops->init || !priv->ops->get_temp) {
1362 ret = -EINVAL;
1363 goto free_wq;
1364 }
1365
1366 ret = priv->ops->init(priv);
1367 if (ret < 0) {
1368 dev_err(dev, "%s: init failed\n", __func__);
1369 goto free_wq;
1370 }
1371
1372 if (priv->ops->calibrate) {
1373 ret = priv->ops->calibrate(priv);
1374 if (ret < 0) {
1375 if (ret != -EPROBE_DEFER)
1376 dev_err(dev, "%s: calibration failed\n", __func__);
1377
1378 goto free_wq;
1379 }
1380 }
1381
1382 ret = tsens_register(priv);
1383 if (ret < 0) {
1384 dev_err(dev, "%s: registration failed\n", __func__);
1385 goto free_wq;
1386 }
1387
1388 list_add_tail(&priv->list, &tsens_device_list);
1389 return 0;
1390
1391 free_wq:
1392 destroy_workqueue(priv->reinit_wa_worker);
1393 return ret;
1394 }
1395
--
0-DAY CI Kernel Test Service
https://01.org/lkp
Hi,
On Fri, 8 Jul 2022 at 17:10, kernel test robot <[email protected]> wrote:
>
> Hi Bhupesh,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on rafael-pm/thermal]
> [also build test ERROR on linus/master v5.19-rc5 next-20220707]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Bhupesh-Sharma/Add-support-for-tsens-controller-reinit-via-trustzone/20220701-230113
> base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git thermal
> config: arm64-randconfig-r015-20220707 (https://download.01.org/0day-ci/archive/20220708/[email protected]/config)
> compiler: aarch64-linux-gcc (GCC) 11.3.0
> reproduce (this is a W=1 build):
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # https://github.com/intel-lab-lkp/linux/commit/32929e13eb338e76b714bb8b4805899e2857734f
> git remote add linux-review https://github.com/intel-lab-lkp/linux
> git fetch --no-tags linux-review Bhupesh-Sharma/Add-support-for-tsens-controller-reinit-via-trustzone/20220701-230113
> git checkout 32929e13eb338e76b714bb8b4805899e2857734f
> # save the config file
> mkdir build_dir && cp config build_dir/.config
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=arm64 SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag where applicable
> Reported-by: kernel test robot <[email protected]>
>
> All errors (new ones prefixed by >>):
>
> aarch64-linux-ld: Unexpected GOT/PLT entries detected!
> aarch64-linux-ld: Unexpected run-time procedure linkages detected!
> aarch64-linux-ld: ID map text too big or misaligned
> aarch64-linux-ld: drivers/thermal/qcom/tsens.o: in function `tsens_probe':
> >> drivers/thermal/qcom/tsens.c:1337: undefined reference to `qcom_scm_is_available'
> aarch64-linux-ld: drivers/thermal/qcom/tsens.o: in function `get_temp_tsens_valid':
> >> drivers/thermal/qcom/tsens.c:714: undefined reference to `qcom_scm_tsens_reinit'
>
>
> vim +1337 drivers/thermal/qcom/tsens.c
>
> 1293
> 1294 static int tsens_probe(struct platform_device *pdev)
> 1295 {
> 1296 int ret, i;
> 1297 struct device *dev;
> 1298 struct device_node *np;
> 1299 struct tsens_priv *priv;
> 1300 const struct tsens_plat_data *data;
> 1301 const struct of_device_id *id;
> 1302 u32 num_sensors;
> 1303
> 1304 if (pdev->dev.of_node)
> 1305 dev = &pdev->dev;
> 1306 else
> 1307 dev = pdev->dev.parent;
> 1308
> 1309 np = dev->of_node;
> 1310
> 1311 id = of_match_node(tsens_table, np);
> 1312 if (id)
> 1313 data = id->data;
> 1314 else
> 1315 data = &data_8960;
> 1316
> 1317 num_sensors = data->num_sensors;
> 1318
> 1319 if (np)
> 1320 of_property_read_u32(np, "#qcom,sensors", &num_sensors);
> 1321
> 1322 if (num_sensors <= 0) {
> 1323 dev_err(dev, "%s: invalid number of sensors\n", __func__);
> 1324 return -EINVAL;
> 1325 }
> 1326
> 1327 priv = devm_kzalloc(dev,
> 1328 struct_size(priv, sensor, num_sensors),
> 1329 GFP_KERNEL);
> 1330 if (!priv)
> 1331 return -ENOMEM;
> 1332
> 1333 priv->dev = dev;
> 1334 priv->num_sensors = num_sensors;
> 1335 priv->needs_reinit_wa = data->needs_reinit_wa;
> 1336
> > 1337 if (priv->needs_reinit_wa && !qcom_scm_is_available())
> 1338 return -EPROBE_DEFER;
> 1339
> 1340 if (priv->needs_reinit_wa) {
> 1341 priv->reinit_wa_worker = alloc_workqueue("tsens_reinit_work",
> 1342 WQ_HIGHPRI, 0);
> 1343 if (!priv->reinit_wa_worker)
> 1344 return -ENOMEM;
> 1345
> 1346 INIT_WORK(&priv->reinit_wa_notify, tsens_reinit_worker_notify);
> 1347 }
> 1348
> 1349 priv->ops = data->ops;
> 1350 for (i = 0; i < priv->num_sensors; i++) {
> 1351 if (data->hw_ids)
> 1352 priv->sensor[i].hw_id = data->hw_ids[i];
> 1353 else
> 1354 priv->sensor[i].hw_id = i;
> 1355 }
> 1356 priv->feat = data->feat;
> 1357 priv->fields = data->fields;
> 1358
> 1359 platform_set_drvdata(pdev, priv);
> 1360
> 1361 if (!priv->ops || !priv->ops->init || !priv->ops->get_temp) {
> 1362 ret = -EINVAL;
> 1363 goto free_wq;
> 1364 }
> 1365
> 1366 ret = priv->ops->init(priv);
> 1367 if (ret < 0) {
> 1368 dev_err(dev, "%s: init failed\n", __func__);
> 1369 goto free_wq;
> 1370 }
> 1371
> 1372 if (priv->ops->calibrate) {
> 1373 ret = priv->ops->calibrate(priv);
> 1374 if (ret < 0) {
> 1375 if (ret != -EPROBE_DEFER)
> 1376 dev_err(dev, "%s: calibration failed\n", __func__);
> 1377
> 1378 goto free_wq;
> 1379 }
> 1380 }
> 1381
> 1382 ret = tsens_register(priv);
> 1383 if (ret < 0) {
> 1384 dev_err(dev, "%s: registration failed\n", __func__);
> 1385 goto free_wq;
> 1386 }
> 1387
> 1388 list_add_tail(&priv->list, &tsens_device_list);
> 1389 return 0;
> 1390
> 1391 free_wq:
> 1392 destroy_workqueue(priv->reinit_wa_worker);
> 1393 return ret;
> 1394 }
> 1395
Thanks, I will fix this in v2.
Regards,
Bhupesh
On 1.07.2022 16:58, Bhupesh Sharma wrote:
> Since for some QCoM tsens controllers, its suggested to
> monitor the controller health periodically and in case an
> issue is detected, to re-initialize the tsens controller
> via trustzone, add the support for the same in the
> qcom tsens driver.
>
> Note that Once the tsens controller is reset using scm call,
> all SROT and TM region registers will enter the reset mode.
>
> While all the SROT registers will be re-programmed and
> re-enabled in trustzone prior to the scm call exit, the TM
> region registers will not re-initialized in trustzone and thus
> need to be handled by the tsens driver.
>
> Cc: Amit Kucheria <[email protected]>
> Cc: Thara Gopinath <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Bhupesh Sharma <[email protected]>
> Reported-by: kernel test robot <[email protected]>
> ---
Hi, I think this should be also checked and applied on init. This
seems required for at least SM6375, as the controller starts (or
well, doesn't start...) in an unknown state and the driver does
not like it, as the TSENS_EN indicates it is disabled.
Downstream runs this right at probe..
Konrad
Hi Konrad,
On 7/15/22 8:26 PM, Konrad Dybcio <[email protected]> wrote:
>
>
> On 1.07.2022 16:58, Bhupesh Sharma wrote:
> > Since for some QCoM tsens controllers, its suggested to
> > monitor the controller health periodically and in case an
> > issue is detected, to re-initialize the tsens controller
> > via trustzone, add the support for the same in the
> > qcom tsens driver.
> >
> > Note that Once the tsens controller is reset using scm call,
> > all SROT and TM region registers will enter the reset mode.
> >
> > While all the SROT registers will be re-programmed and
> > re-enabled in trustzone prior to the scm call exit, the TM
> > region registers will not re-initialized in trustzone and thus
> > need to be handled by the tsens driver.
> >
> > Cc: Amit Kucheria <[email protected]>
> > Cc: Thara Gopinath <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Bhupesh Sharma <[email protected]>
> > Reported-by: kernel test robot <[email protected]>
> > ---
> Hi, I think this should be also checked and applied on init. This
> seems required for at least SM6375, as the controller starts (or
> well, doesn't start...) in an unknown state and the driver does
> not like it, as the TSENS_EN indicates it is disabled.
> Downstream runs this right at probe..
Hmm.. very interesting. I was not aware of the SM6375 case, as for SM8150
the controller starts in a valid state but may require reinit during operation.
So, I did not use the downstream approach to do it right at _probe() and then
later while get_temp() is called.
Let me add that in v2. BTW do you want me to set the need_reinit_wa as true
for SM6375 as well, or would you like to add that with a followup-patch ?
Regards,
Bhupesh
On Fri 01 Jul 09:58 CDT 2022, Bhupesh Sharma wrote:
> Since for some QCoM tsens controllers, its suggested to
> monitor the controller health periodically and in case an
> issue is detected, to re-initialize the tsens controller
> via trustzone, add the support for the same in the
> qcom tsens driver.
>
> Note that Once the tsens controller is reset using scm call,
> all SROT and TM region registers will enter the reset mode.
>
> While all the SROT registers will be re-programmed and
> re-enabled in trustzone prior to the scm call exit, the TM
> region registers will not re-initialized in trustzone and thus
> need to be handled by the tsens driver.
>
> Cc: Amit Kucheria <[email protected]>
> Cc: Thara Gopinath <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Bhupesh Sharma <[email protected]>
> ---
> drivers/thermal/qcom/tsens-v2.c | 3 +
> drivers/thermal/qcom/tsens.c | 237 +++++++++++++++++++++++++++++++-
> drivers/thermal/qcom/tsens.h | 6 +
> 3 files changed, 239 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c
> index 61d38a56d29a..9bb542f16482 100644
> --- a/drivers/thermal/qcom/tsens-v2.c
> +++ b/drivers/thermal/qcom/tsens-v2.c
> @@ -88,6 +88,9 @@ static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = {
>
> /* TRDY: 1=ready, 0=in progress */
> [TRDY] = REG_FIELD(TM_TRDY_OFF, 0, 0),
> +
> + /* FIRST_ROUND_COMPLETE: 1=complete, 0=not complete */
> + [FIRST_ROUND_COMPLETE] = REG_FIELD(TM_TRDY_OFF, 3, 3),
> };
>
> static const struct tsens_ops ops_generic_v2 = {
> diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
> index 97f4d4454f20..28d42ae0eb47 100644
> --- a/drivers/thermal/qcom/tsens.c
> +++ b/drivers/thermal/qcom/tsens.c
> @@ -7,6 +7,7 @@
> #include <linux/debugfs.h>
> #include <linux/err.h>
> #include <linux/io.h>
> +#include <linux/qcom_scm.h>
> #include <linux/module.h>
> #include <linux/nvmem-consumer.h>
> #include <linux/of.h>
> @@ -21,6 +22,8 @@
> #include "../thermal_hwmon.h"
> #include "tsens.h"
>
> +LIST_HEAD(tsens_device_list);
> +
> /**
> * struct tsens_irq_data - IRQ status and temperature violations
> * @up_viol: upper threshold violated
> @@ -594,19 +597,159 @@ static void tsens_disable_irq(struct tsens_priv *priv)
> regmap_field_write(priv->rf[INT_EN], 0);
> }
>
> +static int tsens_reenable_hw_after_scm(struct tsens_priv *priv)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&priv->ul_lock, flags);
> +
> + /* Re-enable watchdog, unmask the bark and
> + * disable cycle completion monitoring.
> + */
> + regmap_field_write(priv->rf[WDOG_BARK_CLEAR], 1);
> + regmap_field_write(priv->rf[WDOG_BARK_CLEAR], 0);
> + regmap_field_write(priv->rf[WDOG_BARK_MASK], 0);
> + regmap_field_write(priv->rf[CC_MON_MASK], 1);
> +
> + /* Re-enable interrupts */
> + tsens_enable_irq(priv);
> +
> + spin_unlock_irqrestore(&priv->ul_lock, flags);
> +
> + return 0;
> +}
> +
> int get_temp_tsens_valid(const struct tsens_sensor *s, int *temp)
> {
> - struct tsens_priv *priv = s->priv;
> + struct tsens_priv *priv = s->priv, *priv_reinit;
> int hw_id = s->hw_id;
> u32 temp_idx = LAST_TEMP_0 + hw_id;
> u32 valid_idx = VALID_0 + hw_id;
> u32 valid;
> - int ret;
> + int ret, trdy, first_round, tsens_ret, sw_reg;
> + unsigned long timeout;
> + static atomic_t in_tsens_reinit;
This is a global state, I suggest you move it to the top of the file to
make that obvious.
>
> /* VER_0 doesn't have VALID bit */
> if (tsens_version(priv) == VER_0)
> goto get_temp;
>
> + /* For some tsens controllers, its suggested to
> + * monitor the controller health periodically
> + * and in case an issue is detected to reinit
> + * tsens controller via trustzone.
> + */
> + if (priv->needs_reinit_wa) {
I would suggest that you move all this entire block to a separate
function, maybe something:
int tsens_health_check_and_reinit()
> + /* First check if TRDY is SET */
> + timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
> + do {
> + ret = regmap_field_read(priv->rf[TRDY], &trdy);
> + if (ret)
> + goto err;
> + if (!trdy)
> + continue;
> + } while (time_before(jiffies, timeout));
This looks like a regmap_field_read()
> +
> + if (!trdy) {
> + ret = regmap_field_read(priv->rf[FIRST_ROUND_COMPLETE], &first_round);
> + if (ret)
> + goto err;
> +
> + if (!first_round) {
> + if (atomic_read(&in_tsens_reinit)) {
> + dev_dbg(priv->dev, "tsens re-init is in progress\n");
> + ret = -EAGAIN;
Is it preferred to return -EAGAIN here, over just serializing this whole
thing using a mutex?
> + goto err;
> + }
> +
> + /* Wait for 2 ms for tsens controller to recover */
> + timeout = jiffies + msecs_to_jiffies(RESET_TIMEOUT_MS);
> + do {
> + ret = regmap_field_read(priv->rf[FIRST_ROUND_COMPLETE],
> + &first_round);
> + if (ret)
> + goto err;
> +
> + if (first_round) {
> + dev_dbg(priv->dev, "tsens controller recovered\n");
> + goto sensor_read;
> + }
> + } while (time_before(jiffies, timeout));
> +
> + /*
> + * tsens controller did not recover,
> + * proceed with SCM call to re-init it
> + */
> + if (atomic_read(&in_tsens_reinit)) {
> + dev_dbg(priv->dev, "tsens re-init is in progress\n");
> + ret = -EAGAIN;
> + goto err;
> + }
> +
> + atomic_set(&in_tsens_reinit, 1);
Afaict nothing prevents two different processes to run the remainder of
the recovery in parallel. I think you need some locking here.
> +
> + /*
> + * Invoke scm call only if SW register write is
> + * reflecting in controller. Try it for 2 ms.
> + */
> + timeout = jiffies + msecs_to_jiffies(RESET_TIMEOUT_MS);
> + do {
> + ret = regmap_field_write(priv->rf[INT_EN], BIT(2));
Do we know what BIT(2) is and would we be allowed to give it a define?
> + if (ret)
> + goto err_unset;
> +
> + ret = regmap_field_read(priv->rf[INT_EN], &sw_reg);
> + if (ret)
> + goto err_unset;
> +
> + if (!(sw_reg & BIT(2)))
> + continue;
Why not:
} while (sw_reg & BIT(2) && time_before(jiffies, timeout));
> + } while (time_before(jiffies, timeout));
> +
> + if (!(sw_reg & BIT(2))) {
> + ret = -ENOTRECOVERABLE;
> + goto err_unset;
> + }
> +
> + ret = qcom_scm_tsens_reinit(&tsens_ret);
> + if (ret || tsens_ret) {
> + dev_err(priv->dev, "tsens reinit scm call failed (%d : %d)\n",
> + ret, tsens_ret);
> + if (tsens_ret)
> + ret = -ENOTRECOVERABLE;
If that's the api for the SCM, feel free to move the -ENOTRECOVERABLE to
the scm function.
> +
> + goto err_unset;
> + }
> +
> + /* After the SCM call, we need to re-enable
> + * the interrupts and also set active threshold
> + * for each sensor.
> + */
> + list_for_each_entry(priv_reinit,
> + &tsens_device_list, list) {
> + ret = tsens_reenable_hw_after_scm(priv_reinit);
> + if (ret) {
> + dev_err(priv->dev,
> + "tsens re-enable after scm call failed (%d)\n",
> + ret);
> + ret = -ENOTRECOVERABLE;
> + goto err_unset;
> + }
> + }
> +
> + atomic_set(&in_tsens_reinit, 0);
> +
> + /* Notify reinit wa worker */
> + list_for_each_entry(priv_reinit,
Do you need to loop twice over the tsens instances?
> + &tsens_device_list, list) {
> + queue_work(priv_reinit->reinit_wa_worker,
> + &priv_reinit->reinit_wa_notify);
> + }
> + }
> + }
> + }
> +
> +sensor_read:
> /* Valid bit is 0 for 6 AHB clock cycles.
> * At 19.2MHz, 1 AHB clock is ~60ns.
> * We should enter this loop very, very rarely.
> @@ -623,6 +766,12 @@ int get_temp_tsens_valid(const struct tsens_sensor *s, int *temp)
> *temp = tsens_hw_to_mC(s, temp_idx);
>
> return 0;
> +
> +err_unset:
> + atomic_set(&in_tsens_reinit, 0);
> +
> +err:
> + return ret;
> }
>
> int get_temp_common(const struct tsens_sensor *s, int *temp)
> @@ -860,6 +1009,14 @@ int __init init_common(struct tsens_priv *priv)
> goto err_put_device;
> }
>
> + priv->rf[FIRST_ROUND_COMPLETE] = devm_regmap_field_alloc(dev,
> + priv->tm_map,
> + priv->fields[FIRST_ROUND_COMPLETE]);
> + if (IS_ERR(priv->rf[FIRST_ROUND_COMPLETE])) {
> + ret = PTR_ERR(priv->rf[FIRST_ROUND_COMPLETE]);
> + goto err_put_device;
> + }
> +
> /* This loop might need changes if enum regfield_ids is reordered */
> for (j = LAST_TEMP_0; j <= UP_THRESH_15; j += 16) {
> for (i = 0; i < priv->feat->max_sensors; i++) {
> @@ -1097,6 +1254,43 @@ static int tsens_register(struct tsens_priv *priv)
> return ret;
> }
>
> +static void tsens_reinit_worker_notify(struct work_struct *work)
> +{
> + int i, ret, temp;
priv->num_sensors is unsigned, so i could be too.
> + struct tsens_irq_data d;
> + struct tsens_priv *priv = container_of(work, struct tsens_priv,
> + reinit_wa_notify);
> +
> + for (i = 0; i < priv->num_sensors; i++) {
> + const struct tsens_sensor *s = &priv->sensor[i];
> + u32 hw_id = s->hw_id;
> +
> + if (!s->tzd)
> + continue;
> + if (!tsens_threshold_violated(priv, hw_id, &d))
> + continue;
> +
> + ret = get_temp_tsens_valid(s, &temp);
> + if (ret) {
> + dev_err(priv->dev, "[%u] %s: error reading sensor\n",
> + hw_id, __func__);
Please express yourself in the message, instead of using __func__.
> + continue;
> + }
> +
> + tsens_read_irq_state(priv, hw_id, s, &d);
> +
> + if ((d.up_thresh < temp) || (d.low_thresh > temp)) {
> + dev_dbg(priv->dev, "[%u] %s: TZ update trigger (%d mC)\n",
> + hw_id, __func__, temp);
> + thermal_zone_device_update(s->tzd,
> + THERMAL_EVENT_UNSPECIFIED);
This is just 86 chars long, no need to wrap the line.
> + } else {
> + dev_dbg(priv->dev, "[%u] %s: no violation: %d\n",
Double space after ':'
> + hw_id, __func__, temp);
> + }
> + }
> +}
> +
> static int tsens_probe(struct platform_device *pdev)
> {
> int ret, i;
> @@ -1139,6 +1333,19 @@ static int tsens_probe(struct platform_device *pdev)
> priv->dev = dev;
> priv->num_sensors = num_sensors;
> priv->needs_reinit_wa = data->needs_reinit_wa;
> +
> + if (priv->needs_reinit_wa && !qcom_scm_is_available())
> + return -EPROBE_DEFER;
> +
> + if (priv->needs_reinit_wa) {
> + priv->reinit_wa_worker = alloc_workqueue("tsens_reinit_work",
> + WQ_HIGHPRI, 0);
Do you really need your own work queue for this, how about just
scheduling the work on system_highpri_wq?
Regards,
Bjorn
On 18.07.2022 08:34, [email protected] wrote:
> Hi Konrad,
>
> On 7/15/22 8:26 PM, Konrad Dybcio <[email protected]> wrote:
>>
>>
>> On 1.07.2022 16:58, Bhupesh Sharma wrote:
>> > Since for some QCoM tsens controllers, its suggested to
>> > monitor the controller health periodically and in case an
>> > issue is detected, to re-initialize the tsens controller
>> > via trustzone, add the support for the same in the
>> > qcom tsens driver.
>> >
>> > Note that Once the tsens controller is reset using scm call,
>> > all SROT and TM region registers will enter the reset mode.
>> >
>> > While all the SROT registers will be re-programmed and
>> > re-enabled in trustzone prior to the scm call exit, the TM
>> > region registers will not re-initialized in trustzone and thus
>> > need to be handled by the tsens driver.
>> >
>> > Cc: Amit Kucheria <[email protected]>
>> > Cc: Thara Gopinath <[email protected]>
>> > Cc: [email protected]
>> > Cc: [email protected]
>> > Signed-off-by: Bhupesh Sharma <[email protected]>
>> > Reported-by: kernel test robot <[email protected]>
>> > ---
>> Hi, I think this should be also checked and applied on init. This
>> seems required for at least SM6375, as the controller starts (or
>> well, doesn't start...) in an unknown state and the driver does
>> not like it, as the TSENS_EN indicates it is disabled.
>> Downstream runs this right at probe..
>
> Hmm.. very interesting. I was not aware of the SM6375 case, as for SM8150
> the controller starts in a valid state but may require reinit during operation.
>
> So, I did not use the downstream approach to do it right at _probe() and then
> later while get_temp() is called.
>
> Let me add that in v2. BTW do you want me to set the need_reinit_wa as true
> for SM6375 as well, or would you like to add that with a followup-patch ?
Please set it, I'll happily test it!
Konrad
>
> Regards,
> Bhupesh
On 7/19/22 4:09 PM, Konrad Dybcio wrote:
>
>
> On 18.07.2022 08:34, [email protected] wrote:
>> Hi Konrad,
>>
>> On 7/15/22 8:26 PM, Konrad Dybcio <[email protected]> wrote:
>>>
>>>
>>> On 1.07.2022 16:58, Bhupesh Sharma wrote:
>>>> Since for some QCoM tsens controllers, its suggested to
>>>> monitor the controller health periodically and in case an
>>>> issue is detected, to re-initialize the tsens controller
>>>> via trustzone, add the support for the same in the
>>>> qcom tsens driver.
>>>>
>>>> Note that Once the tsens controller is reset using scm call,
>>>> all SROT and TM region registers will enter the reset mode.
>>>>
>>>> While all the SROT registers will be re-programmed and
>>>> re-enabled in trustzone prior to the scm call exit, the TM
>>>> region registers will not re-initialized in trustzone and thus
>>>> need to be handled by the tsens driver.
>>>>
>>>> Cc: Amit Kucheria <[email protected]>
>>>> Cc: Thara Gopinath <[email protected]>
>>>> Cc: [email protected]
>>>> Cc: [email protected]
>>>> Signed-off-by: Bhupesh Sharma <[email protected]>
>>>> Reported-by: kernel test robot <[email protected]>
>>>> ---
>>> Hi, I think this should be also checked and applied on init. This
>>> seems required for at least SM6375, as the controller starts (or
>>> well, doesn't start...) in an unknown state and the driver does
>>> not like it, as the TSENS_EN indicates it is disabled.
>>> Downstream runs this right at probe..
>>
>> Hmm.. very interesting. I was not aware of the SM6375 case, as for SM8150
>> the controller starts in a valid state but may require reinit during operation.
>>
>> So, I did not use the downstream approach to do it right at _probe() and then
>> later while get_temp() is called.
>>
>> Let me add that in v2. BTW do you want me to set the need_reinit_wa as true
>> for SM6375 as well, or would you like to add that with a followup-patch ?
> Please set it, I'll happily test it!
Thanks. Will share v2 shortly.
Regards,
Bhupesh
Hi Bjorn,
Thanks for your review.
On 7/19/22 9:00 AM, Bjorn Andersson wrote:
> On Fri 01 Jul 09:58 CDT 2022, Bhupesh Sharma wrote:
>
>> Since for some QCoM tsens controllers, its suggested to
>> monitor the controller health periodically and in case an
>> issue is detected, to re-initialize the tsens controller
>> via trustzone, add the support for the same in the
>> qcom tsens driver.
>>
>> Note that Once the tsens controller is reset using scm call,
>> all SROT and TM region registers will enter the reset mode.
>>
>> While all the SROT registers will be re-programmed and
>> re-enabled in trustzone prior to the scm call exit, the TM
>> region registers will not re-initialized in trustzone and thus
>> need to be handled by the tsens driver.
>>
>> Cc: Amit Kucheria <[email protected]>
>> Cc: Thara Gopinath <[email protected]>
>> Cc: [email protected]
>> Cc: [email protected]
>> Signed-off-by: Bhupesh Sharma <[email protected]>
>> ---
>> drivers/thermal/qcom/tsens-v2.c | 3 +
>> drivers/thermal/qcom/tsens.c | 237 +++++++++++++++++++++++++++++++-
>> drivers/thermal/qcom/tsens.h | 6 +
>> 3 files changed, 239 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c
>> index 61d38a56d29a..9bb542f16482 100644
>> --- a/drivers/thermal/qcom/tsens-v2.c
>> +++ b/drivers/thermal/qcom/tsens-v2.c
>> @@ -88,6 +88,9 @@ static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = {
>>
>> /* TRDY: 1=ready, 0=in progress */
>> [TRDY] = REG_FIELD(TM_TRDY_OFF, 0, 0),
>> +
>> + /* FIRST_ROUND_COMPLETE: 1=complete, 0=not complete */
>> + [FIRST_ROUND_COMPLETE] = REG_FIELD(TM_TRDY_OFF, 3, 3),
>> };
>>
>> static const struct tsens_ops ops_generic_v2 = {
>> diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
>> index 97f4d4454f20..28d42ae0eb47 100644
>> --- a/drivers/thermal/qcom/tsens.c
>> +++ b/drivers/thermal/qcom/tsens.c
>> @@ -7,6 +7,7 @@
>> #include <linux/debugfs.h>
>> #include <linux/err.h>
>> #include <linux/io.h>
>> +#include <linux/qcom_scm.h>
>> #include <linux/module.h>
>> #include <linux/nvmem-consumer.h>
>> #include <linux/of.h>
>> @@ -21,6 +22,8 @@
>> #include "../thermal_hwmon.h"
>> #include "tsens.h"
>>
>> +LIST_HEAD(tsens_device_list);
>> +
>> /**
>> * struct tsens_irq_data - IRQ status and temperature violations
>> * @up_viol: upper threshold violated
>> @@ -594,19 +597,159 @@ static void tsens_disable_irq(struct tsens_priv *priv)
>> regmap_field_write(priv->rf[INT_EN], 0);
>> }
>>
>> +static int tsens_reenable_hw_after_scm(struct tsens_priv *priv)
>> +{
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&priv->ul_lock, flags);
>> +
>> + /* Re-enable watchdog, unmask the bark and
>> + * disable cycle completion monitoring.
>> + */
>> + regmap_field_write(priv->rf[WDOG_BARK_CLEAR], 1);
>> + regmap_field_write(priv->rf[WDOG_BARK_CLEAR], 0);
>> + regmap_field_write(priv->rf[WDOG_BARK_MASK], 0);
>> + regmap_field_write(priv->rf[CC_MON_MASK], 1);
>> +
>> + /* Re-enable interrupts */
>> + tsens_enable_irq(priv);
>> +
>> + spin_unlock_irqrestore(&priv->ul_lock, flags);
>> +
>> + return 0;
>> +}
>> +
>> int get_temp_tsens_valid(const struct tsens_sensor *s, int *temp)
>> {
>> - struct tsens_priv *priv = s->priv;
>> + struct tsens_priv *priv = s->priv, *priv_reinit;
>> int hw_id = s->hw_id;
>> u32 temp_idx = LAST_TEMP_0 + hw_id;
>> u32 valid_idx = VALID_0 + hw_id;
>> u32 valid;
>> - int ret;
>> + int ret, trdy, first_round, tsens_ret, sw_reg;
>> + unsigned long timeout;
>> + static atomic_t in_tsens_reinit;
>
> This is a global state, I suggest you move it to the top of the file to
> make that obvious.
Sure.
>> /* VER_0 doesn't have VALID bit */
>> if (tsens_version(priv) == VER_0)
>> goto get_temp;
>>
>> + /* For some tsens controllers, its suggested to
>> + * monitor the controller health periodically
>> + * and in case an issue is detected to reinit
>> + * tsens controller via trustzone.
>> + */
>> + if (priv->needs_reinit_wa) {
>
> I would suggest that you move all this entire block to a separate
> function, maybe something:
>
> int tsens_health_check_and_reinit()
Ok. Will fix in v2.
>> + /* First check if TRDY is SET */
>> + timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
>> + do {
>> + ret = regmap_field_read(priv->rf[TRDY], &trdy);
>> + if (ret)
>> + goto err;
>> + if (!trdy)
>> + continue;
>> + } while (time_before(jiffies, timeout));
>
> This looks like a regmap_field_read()
Not sure, I completely understand this comment. Can you please elaborate?
>> +
>> + if (!trdy) {
>> + ret = regmap_field_read(priv->rf[FIRST_ROUND_COMPLETE], &first_round);
>> + if (ret)
>> + goto err;
>> +
>> + if (!first_round) {
>> + if (atomic_read(&in_tsens_reinit)) {
>> + dev_dbg(priv->dev, "tsens re-init is in progress\n");
>> + ret = -EAGAIN;
>
> Is it preferred to return -EAGAIN here, over just serializing this whole
> thing using a mutex?
Right, using a mutex to serialize here makes sense. Will fix in v2.
>> + goto err;
>> + }
>> +
>> + /* Wait for 2 ms for tsens controller to recover */
>> + timeout = jiffies + msecs_to_jiffies(RESET_TIMEOUT_MS);
>> + do {
>> + ret = regmap_field_read(priv->rf[FIRST_ROUND_COMPLETE],
>> + &first_round);
>> + if (ret)
>> + goto err;
>> +
>> + if (first_round) {
>> + dev_dbg(priv->dev, "tsens controller recovered\n");
>> + goto sensor_read;
>> + }
>> + } while (time_before(jiffies, timeout));
>> +
>> + /*
>> + * tsens controller did not recover,
>> + * proceed with SCM call to re-init it
>> + */
>> + if (atomic_read(&in_tsens_reinit)) {
>> + dev_dbg(priv->dev, "tsens re-init is in progress\n");
>> + ret = -EAGAIN;
>> + goto err;
>> + }
>> +
>> + atomic_set(&in_tsens_reinit, 1);
>
> Afaict nothing prevents two different processes to run the remainder of
> the recovery in parallel. I think you need some locking here.
Ack.
>> +
>> + /*
>> + * Invoke scm call only if SW register write is
>> + * reflecting in controller. Try it for 2 ms.
>> + */
>> + timeout = jiffies + msecs_to_jiffies(RESET_TIMEOUT_MS);
>> + do {
>> + ret = regmap_field_write(priv->rf[INT_EN], BIT(2));
>
> Do we know what BIT(2) is and would we be allowed to give it a define?
Sure, I will add a define here.
>> + if (ret)
>> + goto err_unset;
>> +
>> + ret = regmap_field_read(priv->rf[INT_EN], &sw_reg);
>> + if (ret)
>> + goto err_unset;
>> +
>> + if (!(sw_reg & BIT(2)))
>> + continue;
>
> Why not:
>
> } while (sw_reg & BIT(2) && time_before(jiffies, timeout));
Sure.
>> + } while (time_before(jiffies, timeout));
>> +
>> + if (!(sw_reg & BIT(2))) {
>> + ret = -ENOTRECOVERABLE;
>> + goto err_unset;
>> + }
>> +
>> + ret = qcom_scm_tsens_reinit(&tsens_ret);
>> + if (ret || tsens_ret) {
>> + dev_err(priv->dev, "tsens reinit scm call failed (%d : %d)\n",
>> + ret, tsens_ret);
>> + if (tsens_ret)
>> + ret = -ENOTRECOVERABLE;
>
> If that's the api for the SCM, feel free to move the -ENOTRECOVERABLE to
> the scm function.
Ok, let me check and fix this in v2.
>> +
>> + goto err_unset;
>> + }
>> +
>> + /* After the SCM call, we need to re-enable
>> + * the interrupts and also set active threshold
>> + * for each sensor.
>> + */
>> + list_for_each_entry(priv_reinit,
>> + &tsens_device_list, list) {
>> + ret = tsens_reenable_hw_after_scm(priv_reinit);
>> + if (ret) {
>> + dev_err(priv->dev,
>> + "tsens re-enable after scm call failed (%d)\n",
>> + ret);
>> + ret = -ENOTRECOVERABLE;
>> + goto err_unset;
>> + }
>> + }
>> +
>> + atomic_set(&in_tsens_reinit, 0);
>> +
>> + /* Notify reinit wa worker */
>> + list_for_each_entry(priv_reinit,
>
> Do you need to loop twice over the tsens instances?
>
>> + &tsens_device_list, list) {
>> + queue_work(priv_reinit->reinit_wa_worker,
>> + &priv_reinit->reinit_wa_notify);
>> + }
>> + }
>> + }
>> + }
>> +
>> +sensor_read:
>> /* Valid bit is 0 for 6 AHB clock cycles.
>> * At 19.2MHz, 1 AHB clock is ~60ns.
>> * We should enter this loop very, very rarely.
>> @@ -623,6 +766,12 @@ int get_temp_tsens_valid(const struct tsens_sensor *s, int *temp)
>> *temp = tsens_hw_to_mC(s, temp_idx);
>>
>> return 0;
>> +
>> +err_unset:
>> + atomic_set(&in_tsens_reinit, 0);
>> +
>> +err:
>> + return ret;
>> }
>>
>> int get_temp_common(const struct tsens_sensor *s, int *temp)
>> @@ -860,6 +1009,14 @@ int __init init_common(struct tsens_priv *priv)
>> goto err_put_device;
>> }
>>
>> + priv->rf[FIRST_ROUND_COMPLETE] = devm_regmap_field_alloc(dev,
>> + priv->tm_map,
>> + priv->fields[FIRST_ROUND_COMPLETE]);
>> + if (IS_ERR(priv->rf[FIRST_ROUND_COMPLETE])) {
>> + ret = PTR_ERR(priv->rf[FIRST_ROUND_COMPLETE]);
>> + goto err_put_device;
>> + }
>> +
>> /* This loop might need changes if enum regfield_ids is reordered */
>> for (j = LAST_TEMP_0; j <= UP_THRESH_15; j += 16) {
>> for (i = 0; i < priv->feat->max_sensors; i++) {
>> @@ -1097,6 +1254,43 @@ static int tsens_register(struct tsens_priv *priv)
>> return ret;
>> }
>>
>> +static void tsens_reinit_worker_notify(struct work_struct *work)
>> +{
>> + int i, ret, temp;
>
> priv->num_sensors is unsigned, so i could be too.
Ok.
>> + struct tsens_irq_data d;
>> + struct tsens_priv *priv = container_of(work, struct tsens_priv,
>> + reinit_wa_notify);
>> +
>> + for (i = 0; i < priv->num_sensors; i++) {
>> + const struct tsens_sensor *s = &priv->sensor[i];
>> + u32 hw_id = s->hw_id;
>> +
>> + if (!s->tzd)
>> + continue;
>> + if (!tsens_threshold_violated(priv, hw_id, &d))
>> + continue;
>> +
>> + ret = get_temp_tsens_valid(s, &temp);
>> + if (ret) {
>> + dev_err(priv->dev, "[%u] %s: error reading sensor\n",
>> + hw_id, __func__);
>
> Please express yourself in the message, instead of using __func__.
This was a reuse from the existing tsens irq handler code, but I agree.
Let me fix it in v2.
>> + continue;
>> + }
>> +
>> + tsens_read_irq_state(priv, hw_id, s, &d);
>> +
>> + if ((d.up_thresh < temp) || (d.low_thresh > temp)) {
>> + dev_dbg(priv->dev, "[%u] %s: TZ update trigger (%d mC)\n",
>> + hw_id, __func__, temp);
>> + thermal_zone_device_update(s->tzd,
>> + THERMAL_EVENT_UNSPECIFIED);
>
> This is just 86 chars long, no need to wrap the line.
Sure.
>> + } else {
>> + dev_dbg(priv->dev, "[%u] %s: no violation: %d\n",
>
> Double space after ':'
Again this is a reuse from the existing tsens irq handler code, but I
agree. Let me fix it in v2.
>> + hw_id, __func__, temp);
>> + }
>> + }
>> +}
>> +
>> static int tsens_probe(struct platform_device *pdev)
>> {
>> int ret, i;
>> @@ -1139,6 +1333,19 @@ static int tsens_probe(struct platform_device *pdev)
>> priv->dev = dev;
>> priv->num_sensors = num_sensors;
>> priv->needs_reinit_wa = data->needs_reinit_wa;
>> +
>> + if (priv->needs_reinit_wa && !qcom_scm_is_available())
>> + return -EPROBE_DEFER;
>> +
>> + if (priv->needs_reinit_wa) {
>> + priv->reinit_wa_worker = alloc_workqueue("tsens_reinit_work",
>> + WQ_HIGHPRI, 0);
>
> Do you really need your own work queue for this, how about just
> scheduling the work on system_highpri_wq?
Ok, let me use 'system_highpri_wq' in v2.
Regards,
Bhupesh