Currently we get only warning for timeouts. There is no
information available on what is leading to timeouts.
This series add changes to print debug information for TCSes
which are currently in use, commands in the TCSes and its status.
Also include interrupt pending status at GIC and completion status.
Lina Iyer (1):
soc: qcom: rpmh-rsc: Log interrupt status when TCS is busy
Raju P.L.S.S.S.N (1):
soc: qcom: rpmh-rsc: Output debug information from RSC
drivers/soc/qcom/rpmh-internal.h | 3 +
drivers/soc/qcom/rpmh-rsc.c | 125 ++++++++++++++++++++++++++++++++++++++-
drivers/soc/qcom/rpmh.c | 11 +++-
3 files changed, 134 insertions(+), 5 deletions(-)
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
From: "Raju P.L.S.S.S.N" <[email protected]>
Output the TCS state when the debug api is invoked. The state of the
TCS, the contents and the IRQ status is presented. Additionally, crash
the system if any TCS is busy to help with the debug.
Signed-off-by: Raju P.L.S.S.S.N <[email protected]>
Signed-off-by: Lina Iyer <[email protected]>
Signed-off-by: Maulik Shah <[email protected]>
---
drivers/soc/qcom/rpmh-internal.h | 3 +
drivers/soc/qcom/rpmh-rsc.c | 121 ++++++++++++++++++++++++++++++++++++++-
drivers/soc/qcom/rpmh.c | 11 +++-
3 files changed, 130 insertions(+), 5 deletions(-)
diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
index a7bbbb6..15560c63 100644
--- a/drivers/soc/qcom/rpmh-internal.h
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -92,6 +92,7 @@ struct rpmh_ctrlr {
* @tcs_in_use: s/w state of the TCS
* @lock: synchronize state of the controller
* @client: handle to the DRV's client.
+ * @irq: IRQ at gic
*/
struct rsc_drv {
const char *name;
@@ -102,6 +103,7 @@ struct rsc_drv {
DECLARE_BITMAP(tcs_in_use, MAX_TCS_NR);
spinlock_t lock;
struct rpmh_ctrlr client;
+ int irq;
};
int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg);
@@ -110,5 +112,6 @@ int rpmh_rsc_write_ctrl_data(struct rsc_drv *drv,
int rpmh_rsc_invalidate(struct rsc_drv *drv);
void rpmh_tx_done(const struct tcs_request *msg, int r);
+void rpmh_rsc_debug(struct rsc_drv *drv, struct completion *compl);
#endif /* __RPM_INTERNAL_H__ */
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index e278fc1..3595e4d 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -61,6 +61,17 @@
#define CMD_STATUS_ISSUED BIT(8)
#define CMD_STATUS_COMPL BIT(16)
+#define ACCL_TYPE(addr) ((addr >> 16) & 0xF)
+#define NR_ACCL_TYPES 3
+#define MAX_RSC_COUNT 2
+
+static const char * const accl_str[] = {
+ "", "", "", "CLK", "VREG", "BUS",
+};
+
+static struct rsc_drv *__rsc_drv[MAX_RSC_COUNT];
+static int __rsc_count;
+
static u32 read_tcs_reg(struct rsc_drv *drv, int reg, int tcs_id, int cmd_id)
{
return readl_relaxed(drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * tcs_id +
@@ -410,8 +421,8 @@ int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg)
do {
ret = tcs_write(drv, msg);
if (ret == -EBUSY) {
- pr_info_ratelimited("TCS Busy, retrying RPMH message send: addr=%#x\n",
- msg->cmds[0].addr);
+ pr_info_ratelimited("DRV:%s TCS Busy, retrying RPMH message send: addr=%#x\n",
+ drv->name, msg->cmds[0].addr);
udelay(10);
}
} while (ret == -EBUSY);
@@ -521,6 +532,108 @@ int rpmh_rsc_write_ctrl_data(struct rsc_drv *drv, const struct tcs_request *msg)
return tcs_ctrl_write(drv, msg);
}
+static struct tcs_group *get_tcs_from_index(struct rsc_drv *drv, int tcs_id)
+{
+ unsigned int i;
+
+ for (i = 0; i < TCS_TYPE_NR; i++) {
+ if (drv->tcs[i].mask & BIT(tcs_id))
+ return &drv->tcs[i];
+ }
+
+ return NULL;
+}
+
+static void print_tcs_info(struct rsc_drv *drv, int tcs_id, unsigned long *accl)
+{
+ struct tcs_group *tcs_grp = get_tcs_from_index(drv, tcs_id);
+ const struct tcs_request *req = get_req_from_tcs(drv, tcs_id);
+ unsigned long cmds_enabled;
+ u32 addr, data, msgid, sts, irq_sts;
+ bool in_use = test_bit(tcs_id, drv->tcs_in_use);
+ int i;
+
+ if (!tcs_grp || !req)
+ return;
+
+ sts = read_tcs_reg(drv, RSC_DRV_STATUS, tcs_id, 0);
+ cmds_enabled = read_tcs_reg(drv, RSC_DRV_CMD_ENABLE, tcs_id, 0);
+ if (!cmds_enabled)
+ return;
+
+ data = read_tcs_reg(drv, RSC_DRV_CONTROL, tcs_id, 0);
+ irq_sts = read_tcs_reg(drv, RSC_DRV_IRQ_STATUS, 0, 0);
+ pr_warn("Request: tcs-in-use:%s active_tcs=%s(%d) state=%d wait_for_compl=%u]\n",
+ (in_use ? "YES" : "NO"),
+ ((tcs_grp->type == ACTIVE_TCS) ? "YES" : "NO"),
+ tcs_grp->type, req->state, req->wait_for_compl);
+ pr_warn("TCS=%d [ctrlr-sts:%s amc-mode:0x%x irq-sts:%s]\n",
+ tcs_id, sts ? "IDLE" : "BUSY", data,
+ (irq_sts & BIT(tcs_id)) ? "COMPLETED" : "PENDING");
+
+ for_each_set_bit(i, &cmds_enabled, MAX_CMDS_PER_TCS) {
+ addr = read_tcs_reg(drv, RSC_DRV_CMD_ADDR, tcs_id, i);
+ data = read_tcs_reg(drv, RSC_DRV_CMD_DATA, tcs_id, i);
+ msgid = read_tcs_reg(drv, RSC_DRV_CMD_MSGID, tcs_id, i);
+ sts = read_tcs_reg(drv, RSC_DRV_CMD_STATUS, tcs_id, i);
+ pr_warn("\tCMD=%d [addr=0x%x data=0x%x hdr=0x%x sts=0x%x enabled=1]\n",
+ i, addr, data, msgid, sts);
+ if (!(sts & CMD_STATUS_ISSUED))
+ continue;
+ if (!(sts & CMD_STATUS_COMPL))
+ *accl |= BIT(ACCL_TYPE(addr));
+ }
+}
+
+void rpmh_rsc_debug(struct rsc_drv *drv, struct completion *compl)
+{
+ struct irq_data *rsc_irq_data = irq_get_irq_data(drv->irq);
+ bool irq_sts;
+ int i;
+ int busy = 0;
+ unsigned long accl = 0;
+ char str[20] = "";
+
+ pr_warn("RSC:%s\n", drv->name);
+
+ for (i = 0; i < drv->num_tcs; i++) {
+ if (!test_bit(i, drv->tcs_in_use))
+ continue;
+ busy++;
+ print_tcs_info(drv, i, &accl);
+ }
+
+ if (!rsc_irq_data) {
+ pr_err("No IRQ data for RSC:%s\n", drv->name);
+ return;
+ }
+
+ irq_get_irqchip_state(drv->irq, IRQCHIP_STATE_PENDING, &irq_sts);
+ pr_warn("HW IRQ %lu is %s at GIC\n", rsc_irq_data->hwirq,
+ irq_sts ? "PENDING" : "NOT PENDING");
+ pr_warn("Completion is %s to finish\n",
+ completion_done(compl) ? "PENDING" : "NOT PENDING");
+
+ for_each_set_bit(i, &accl, ARRAY_SIZE(accl_str)) {
+ strlcat(str, accl_str[i], sizeof(str));
+ strlcat(str, " ", sizeof(str));
+ }
+
+ if (busy && !irq_sts)
+ pr_warn("ERROR:Accelerator(s) { %s } at AOSS did not respond\n",
+ str);
+ else if (irq_sts)
+ pr_warn("ERROR:Possible lockup in Linux\n");
+
+ /*
+ * The TCS(s) are busy waiting, we have no way to recover from this.
+ * If this debug function is called, we assume it's because timeout
+ * has happened.
+ * Crash and report.
+ */
+ BUG_ON(busy);
+}
+
static int rpmh_probe_tcs_config(struct platform_device *pdev,
struct rsc_drv *drv)
{
@@ -657,6 +770,8 @@ static int rpmh_rsc_probe(struct platform_device *pdev)
if (irq < 0)
return irq;
+ drv->irq = irq;
+
ret = devm_request_irq(&pdev->dev, irq, tcs_tx_done,
IRQF_TRIGGER_HIGH | IRQF_NO_SUSPEND,
drv->name, drv);
@@ -671,6 +786,8 @@ static int rpmh_rsc_probe(struct platform_device *pdev)
INIT_LIST_HEAD(&drv->client.batch_cache);
dev_set_drvdata(&pdev->dev, drv);
+ if (__rsc_count < MAX_RSC_COUNT)
+ __rsc_drv[__rsc_count++] = drv;
return devm_of_platform_populate(&pdev->dev);
}
diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
index 035091f..4759856 100644
--- a/drivers/soc/qcom/rpmh.c
+++ b/drivers/soc/qcom/rpmh.c
@@ -263,6 +263,7 @@ int rpmh_write(const struct device *dev, enum rpmh_state state,
{
DECLARE_COMPLETION_ONSTACK(compl);
DEFINE_RPMH_MSG_ONSTACK(dev, state, &compl, rpm_msg);
+ struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
int ret;
if (!cmd || !n || n > MAX_RPMH_PAYLOAD)
@@ -276,8 +277,12 @@ int rpmh_write(const struct device *dev, enum rpmh_state state,
return ret;
ret = wait_for_completion_timeout(&compl, RPMH_TIMEOUT_MS);
- WARN_ON(!ret);
- return (ret > 0) ? 0 : -ETIMEDOUT;
+ if (!ret) {
+ rpmh_rsc_debug(ctrlr_to_drv(ctrlr), &compl);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
}
EXPORT_SYMBOL(rpmh_write);
@@ -407,7 +412,7 @@ int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
* the completion that we're going to free once
* we've returned from this function.
*/
- WARN_ON(1);
+ rpmh_rsc_debug(ctrlr_to_drv(ctrlr), &compls[i]);
ret = -ETIMEDOUT;
goto exit;
}
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
From: Lina Iyer <[email protected]>
To debug issues when TCS is busy, report interrupt status as well. If
the interrupt line is pending at GIC, then Linux was too busy to process
the interrupt and if not pending then AOSS was too busy to handle the
request.
Signed-off-by: Lina Iyer <[email protected]>
Signed-off-by: Maulik Shah <[email protected]>
---
drivers/soc/qcom/rpmh-rsc.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index 3595e4d..1dc05c3 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -421,8 +421,14 @@ int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg)
do {
ret = tcs_write(drv, msg);
if (ret == -EBUSY) {
- pr_info_ratelimited("DRV:%s TCS Busy, retrying RPMH message send: addr=%#x\n",
- drv->name, msg->cmds[0].addr);
+ bool irq_sts;
+
+ irq_get_irqchip_state(drv->irq, IRQCHIP_STATE_PENDING,
+ &irq_sts);
+ pr_info_ratelimited("DRV:%s TCS Busy, retrying RPMH message send: addr=%#x interrupt status=%s\n",
+ drv->name, msg->cmds[0].addr,
+ irq_sts ?
+ "PENDING" : "NOT PENDING");
udelay(10);
}
} while (ret == -EBUSY);
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation