The core part of this series is the update to the sysmon driver to ensure that
notifications sent to the remote processor are consistent and always present
valid state transitions.
In testing this I finally took the time to fix up the issue of the SMP2P based
graceful shutdown in the remoteproc drivers always timing out if sysmon has
already successfully shut down the remote processor.
Bjorn Andersson (4):
remoteproc: sysmon: Ensure remote notification ordering
remoteproc: sysmon: Expose the shutdown result
remoteproc: qcom: q6v5: Query sysmon before graceful shutdown
remoteproc: sysmon: Improve error messages
drivers/remoteproc/qcom_common.h | 6 ++
drivers/remoteproc/qcom_q6v5.c | 8 +-
drivers/remoteproc/qcom_q6v5.h | 3 +-
drivers/remoteproc/qcom_q6v5_adsp.c | 2 +-
drivers/remoteproc/qcom_q6v5_mss.c | 2 +-
drivers/remoteproc/qcom_q6v5_pas.c | 2 +-
drivers/remoteproc/qcom_q6v5_wcss.c | 2 +-
drivers/remoteproc/qcom_sysmon.c | 121 +++++++++++++++++++++-------
8 files changed, 109 insertions(+), 37 deletions(-)
--
2.28.0
A graceful shutdown of the Qualcomm remote processors where
traditionally performed by invoking a shared memory state signal and
waiting for the associated ack.
This was later superseded by the "sysmon" mechanism, where some form of
shared memory bus is used to send a "graceful shutdown request" message
and one of more signals comes back to indicate its success.
But when this newer mechanism is in effect the firmware is shut down by
the time the older mechanism, implemented in the remoteproc drivers,
attempts to perform a graceful shutdown - and as such it will never
receive an ack back.
This patch therefor track the success of the latest shutdown attempt in
sysmon and exposes a new function in the API that the remoteproc driver
can use to query the success and the necessity of invoking the older
mechanism.
Signed-off-by: Bjorn Andersson <[email protected]>
---
Changes since v1:
- New patch
drivers/remoteproc/qcom_common.h | 6 +++
drivers/remoteproc/qcom_sysmon.c | 82 ++++++++++++++++++++++++--------
2 files changed, 69 insertions(+), 19 deletions(-)
diff --git a/drivers/remoteproc/qcom_common.h b/drivers/remoteproc/qcom_common.h
index dfc641c3a98b..8ba9052955bd 100644
--- a/drivers/remoteproc/qcom_common.h
+++ b/drivers/remoteproc/qcom_common.h
@@ -51,6 +51,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
const char *name,
int ssctl_instance);
void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon);
+bool qcom_sysmon_shutdown_acked(struct qcom_sysmon *sysmon);
#else
static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
const char *name,
@@ -62,6 +63,11 @@ static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
static inline void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
{
}
+
+static inline bool qcom_sysmon_shutdown_acked(struct qcom_sysmon *sysmon)
+{
+ return false;
+}
#endif
#endif
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
index 38f63c968fa8..1c42f00010d3 100644
--- a/drivers/remoteproc/qcom_sysmon.c
+++ b/drivers/remoteproc/qcom_sysmon.c
@@ -44,6 +44,7 @@ struct qcom_sysmon {
struct mutex lock;
bool ssr_ack;
+ bool shutdown_acked;
struct qmi_handle qmi;
struct sockaddr_qrtr ssctl;
@@ -115,10 +116,13 @@ static void sysmon_send_event(struct qcom_sysmon *sysmon,
/**
* sysmon_request_shutdown() - request graceful shutdown of remote
* @sysmon: sysmon context
+ *
+ * Return: boolean indicator of the remote processor acking the request
*/
-static void sysmon_request_shutdown(struct qcom_sysmon *sysmon)
+static bool sysmon_request_shutdown(struct qcom_sysmon *sysmon)
{
char *req = "ssr:shutdown";
+ bool acked = false;
int ret;
mutex_lock(&sysmon->lock);
@@ -141,9 +145,13 @@ static void sysmon_request_shutdown(struct qcom_sysmon *sysmon)
if (!sysmon->ssr_ack)
dev_err(sysmon->dev,
"unexpected response to sysmon shutdown request\n");
+ else
+ acked = true;
out_unlock:
mutex_unlock(&sysmon->lock);
+
+ return acked;
}
static int sysmon_callback(struct rpmsg_device *rpdev, void *data, int count,
@@ -297,14 +305,33 @@ static struct qmi_msg_handler qmi_indication_handler[] = {
{}
};
+static bool ssctl_request_shutdown_wait(struct qcom_sysmon *sysmon)
+{
+ int ret;
+
+ ret = wait_for_completion_timeout(&sysmon->shutdown_comp, 10 * HZ);
+ if (ret)
+ return true;
+
+ ret = try_wait_for_completion(&sysmon->ind_comp);
+ if (ret)
+ return true;
+
+ dev_err(sysmon->dev, "timeout waiting for shutdown ack\n");
+ return false;
+}
+
/**
* ssctl_request_shutdown() - request shutdown via SSCTL QMI service
* @sysmon: sysmon context
+ *
+ * Return: boolean indicator of the remote processor acking the request
*/
-static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
+static bool ssctl_request_shutdown(struct qcom_sysmon *sysmon)
{
struct ssctl_shutdown_resp resp;
struct qmi_txn txn;
+ bool acked = false;
int ret;
reinit_completion(&sysmon->ind_comp);
@@ -312,7 +339,7 @@ static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_shutdown_resp_ei, &resp);
if (ret < 0) {
dev_err(sysmon->dev, "failed to allocate QMI txn\n");
- return;
+ return false;
}
ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
@@ -320,27 +347,23 @@ static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
if (ret < 0) {
dev_err(sysmon->dev, "failed to send shutdown request\n");
qmi_txn_cancel(&txn);
- return;
+ return false;
}
ret = qmi_txn_wait(&txn, 5 * HZ);
- if (ret < 0)
+ if (ret < 0) {
dev_err(sysmon->dev, "failed receiving QMI response\n");
- else if (resp.resp.result)
+ } else if (resp.resp.result) {
dev_err(sysmon->dev, "shutdown request failed\n");
- else
+ } else {
dev_dbg(sysmon->dev, "shutdown request completed\n");
-
- if (sysmon->shutdown_irq > 0) {
- ret = wait_for_completion_timeout(&sysmon->shutdown_comp,
- 10 * HZ);
- if (!ret) {
- ret = try_wait_for_completion(&sysmon->ind_comp);
- if (!ret)
- dev_err(sysmon->dev,
- "timeout waiting for shutdown ack\n");
- }
+ acked = true;
}
+
+ if (sysmon->shutdown_irq > 0)
+ return ssctl_request_shutdown_wait(sysmon);
+
+ return acked;
}
/**
@@ -508,6 +531,9 @@ static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
.subsys_name = sysmon->name,
.ssr_event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN
};
+ bool acked;
+
+ sysmon->shutdown_acked = false;
mutex_lock(&sysmon->state_lock);
sysmon->state = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
@@ -519,9 +545,11 @@ static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
return;
if (sysmon->ssctl_version)
- ssctl_request_shutdown(sysmon);
+ acked = ssctl_request_shutdown(sysmon);
else if (sysmon->ept)
- sysmon_request_shutdown(sysmon);
+ acked = sysmon_request_shutdown(sysmon);
+
+ sysmon->shutdown_acked = acked;
}
static void sysmon_unprepare(struct rproc_subdev *subdev)
@@ -679,6 +707,22 @@ void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
}
EXPORT_SYMBOL_GPL(qcom_remove_sysmon_subdev);
+/**
+ * qcom_sysmon_shutdown_acked() - query the success of the last shutdown
+ * @sysmon: sysmon context
+ *
+ * When sysmon is used to request a graceful shutdown of the remote processor
+ * this can be used by the remoteproc driver to query the success, in order to
+ * know if it should fall back to other means of requesting a shutdown.
+ *
+ * Return: boolean indicator of the success of the last shutdown request
+ */
+bool qcom_sysmon_shutdown_acked(struct qcom_sysmon *sysmon)
+{
+ return sysmon && sysmon->shutdown_acked;
+}
+EXPORT_SYMBOL_GPL(qcom_sysmon_shutdown_acked);
+
/**
* sysmon_probe() - probe sys_mon channel
* @rpdev: rpmsg device handle
--
2.28.0
The reliance on the remoteproc's state for determining when to send
sysmon notifications to a remote processor is racy with regard to
concurrent remoteproc operations.
Further more the advertisement of the state of other remote processor to
a newly started remote processor might not only send the wrong state,
but might result in a stream of state changes that are out of order.
Address this by introducing state tracking within the sysmon instances
themselves and extend the locking to ensure that the notifications are
consistent with this state.
Fixes: 1f36ab3f6e3b ("remoteproc: sysmon: Inform current rproc about all active rprocs")
Fixes: 1877f54f75ad ("remoteproc: sysmon: Add notifications for events")
Fixes: 1fb82ee806d1 ("remoteproc: qcom: Introduce sysmon")
Cc: [email protected]
Signed-off-by: Bjorn Andersson <[email protected]>
---
Changes since v1:
- Reduced the locking to be per sysmon instance
- Dropped unused local "rproc" variable in sysmon_notify()
drivers/remoteproc/qcom_sysmon.c | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
index 9eb2f6bccea6..38f63c968fa8 100644
--- a/drivers/remoteproc/qcom_sysmon.c
+++ b/drivers/remoteproc/qcom_sysmon.c
@@ -22,6 +22,9 @@ struct qcom_sysmon {
struct rproc_subdev subdev;
struct rproc *rproc;
+ int state;
+ struct mutex state_lock;
+
struct list_head node;
const char *name;
@@ -448,7 +451,10 @@ static int sysmon_prepare(struct rproc_subdev *subdev)
.ssr_event = SSCTL_SSR_EVENT_BEFORE_POWERUP
};
+ mutex_lock(&sysmon->state_lock);
+ sysmon->state = SSCTL_SSR_EVENT_BEFORE_POWERUP;
blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+ mutex_unlock(&sysmon->state_lock);
return 0;
}
@@ -472,22 +478,25 @@ static int sysmon_start(struct rproc_subdev *subdev)
.ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP
};
+ mutex_lock(&sysmon->state_lock);
+ sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP;
blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+ mutex_unlock(&sysmon->state_lock);
- mutex_lock(&sysmon_lock);
list_for_each_entry(target, &sysmon_list, node) {
- if (target == sysmon ||
- target->rproc->state != RPROC_RUNNING)
+ if (target == sysmon)
continue;
+ mutex_lock(&target->state_lock);
event.subsys_name = target->name;
+ event.ssr_event = target->state;
if (sysmon->ssctl_version == 2)
ssctl_send_event(sysmon, &event);
else if (sysmon->ept)
sysmon_send_event(sysmon, &event);
+ mutex_unlock(&target->state_lock);
}
- mutex_unlock(&sysmon_lock);
return 0;
}
@@ -500,7 +509,10 @@ static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
.ssr_event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN
};
+ mutex_lock(&sysmon->state_lock);
+ sysmon->state = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+ mutex_unlock(&sysmon->state_lock);
/* Don't request graceful shutdown if we've crashed */
if (crashed)
@@ -521,7 +533,10 @@ static void sysmon_unprepare(struct rproc_subdev *subdev)
.ssr_event = SSCTL_SSR_EVENT_AFTER_SHUTDOWN
};
+ mutex_lock(&sysmon->state_lock);
+ sysmon->state = SSCTL_SSR_EVENT_AFTER_SHUTDOWN;
blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+ mutex_unlock(&sysmon->state_lock);
}
/**
@@ -534,11 +549,10 @@ static int sysmon_notify(struct notifier_block *nb, unsigned long event,
void *data)
{
struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, nb);
- struct rproc *rproc = sysmon->rproc;
struct sysmon_event *sysmon_event = data;
/* Skip non-running rprocs and the originating instance */
- if (rproc->state != RPROC_RUNNING ||
+ if (sysmon->state != SSCTL_SSR_EVENT_AFTER_POWERUP ||
!strcmp(sysmon_event->subsys_name, sysmon->name)) {
dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name);
return NOTIFY_DONE;
@@ -591,6 +605,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
init_completion(&sysmon->ind_comp);
init_completion(&sysmon->shutdown_comp);
mutex_init(&sysmon->lock);
+ mutex_init(&sysmon->state_lock);
sysmon->shutdown_irq = of_irq_get_byname(sysmon->dev->of_node,
"shutdown-ack");
--
2.28.0
Improve the style of a few of the error messages printed by the sysmon
implementation and fix the copy-pasted shutdown error in the send-event
function.
Signed-off-by: Bjorn Andersson <[email protected]>
---
Changes since v1:
- New patch
drivers/remoteproc/qcom_sysmon.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
index 1c42f00010d3..47683932512a 100644
--- a/drivers/remoteproc/qcom_sysmon.c
+++ b/drivers/remoteproc/qcom_sysmon.c
@@ -352,9 +352,9 @@ static bool ssctl_request_shutdown(struct qcom_sysmon *sysmon)
ret = qmi_txn_wait(&txn, 5 * HZ);
if (ret < 0) {
- dev_err(sysmon->dev, "failed receiving QMI response\n");
+ dev_err(sysmon->dev, "timeout waiting for shutdown response\n");
} else if (resp.resp.result) {
- dev_err(sysmon->dev, "shutdown request failed\n");
+ dev_err(sysmon->dev, "shutdown request rejected\n");
} else {
dev_dbg(sysmon->dev, "shutdown request completed\n");
acked = true;
@@ -397,18 +397,18 @@ static void ssctl_send_event(struct qcom_sysmon *sysmon,
SSCTL_SUBSYS_EVENT_REQ, 40,
ssctl_subsys_event_req_ei, &req);
if (ret < 0) {
- dev_err(sysmon->dev, "failed to send shutdown request\n");
+ dev_err(sysmon->dev, "failed to send subsystem event\n");
qmi_txn_cancel(&txn);
return;
}
ret = qmi_txn_wait(&txn, 5 * HZ);
if (ret < 0)
- dev_err(sysmon->dev, "failed receiving QMI response\n");
+ dev_err(sysmon->dev, "timeout waiting for subsystem event response\n");
else if (resp.resp.result)
- dev_err(sysmon->dev, "ssr event send failed\n");
+ dev_err(sysmon->dev, "subsystem event rejected\n");
else
- dev_dbg(sysmon->dev, "ssr event send completed\n");
+ dev_dbg(sysmon->dev, "subsystem event accepted\n");
}
/**
--
2.28.0
On 2020-11-04 20:50, Bjorn Andersson wrote:
> The reliance on the remoteproc's state for determining when to send
> sysmon notifications to a remote processor is racy with regard to
> concurrent remoteproc operations.
>
> Further more the advertisement of the state of other remote processor
> to
> a newly started remote processor might not only send the wrong state,
> but might result in a stream of state changes that are out of order.
>
> Address this by introducing state tracking within the sysmon instances
> themselves and extend the locking to ensure that the notifications are
> consistent with this state.
>
> Fixes: 1f36ab3f6e3b ("remoteproc: sysmon: Inform current rproc about
> all active rprocs")
> Fixes: 1877f54f75ad ("remoteproc: sysmon: Add notifications for
> events")
> Fixes: 1fb82ee806d1 ("remoteproc: qcom: Introduce sysmon")
> Cc: [email protected]
> Signed-off-by: Bjorn Andersson <[email protected]>
> ---
>
> Changes since v1:
> - Reduced the locking to be per sysmon instance
> - Dropped unused local "rproc" variable in sysmon_notify()
>
> drivers/remoteproc/qcom_sysmon.c | 27 +++++++++++++++++++++------
> 1 file changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/remoteproc/qcom_sysmon.c
> b/drivers/remoteproc/qcom_sysmon.c
> index 9eb2f6bccea6..38f63c968fa8 100644
> --- a/drivers/remoteproc/qcom_sysmon.c
> +++ b/drivers/remoteproc/qcom_sysmon.c
> @@ -22,6 +22,9 @@ struct qcom_sysmon {
> struct rproc_subdev subdev;
> struct rproc *rproc;
>
> + int state;
> + struct mutex state_lock;
> +
> struct list_head node;
>
> const char *name;
> @@ -448,7 +451,10 @@ static int sysmon_prepare(struct rproc_subdev
> *subdev)
> .ssr_event = SSCTL_SSR_EVENT_BEFORE_POWERUP
> };
>
> + mutex_lock(&sysmon->state_lock);
> + sysmon->state = SSCTL_SSR_EVENT_BEFORE_POWERUP;
> blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> + mutex_unlock(&sysmon->state_lock);
>
> return 0;
> }
> @@ -472,22 +478,25 @@ static int sysmon_start(struct rproc_subdev
> *subdev)
> .ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP
> };
>
> + mutex_lock(&sysmon->state_lock);
> + sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP;
> blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> + mutex_unlock(&sysmon->state_lock);
>
> - mutex_lock(&sysmon_lock);
We should keep the sysmon_lock to make sure sysmon_list is not modified
at the time we are doing this operation?
> list_for_each_entry(target, &sysmon_list, node) {
> - if (target == sysmon ||
> - target->rproc->state != RPROC_RUNNING)
> + if (target == sysmon)
> continue;
>
> + mutex_lock(&target->state_lock);
> event.subsys_name = target->name;
> + event.ssr_event = target->state;
Is it better to only send this event when target->state is
"SSCTL_SSR_EVENT_AFTER_POWERUP"?
>
> if (sysmon->ssctl_version == 2)
> ssctl_send_event(sysmon, &event);
> else if (sysmon->ept)
> sysmon_send_event(sysmon, &event);
> + mutex_unlock(&target->state_lock);
> }
> - mutex_unlock(&sysmon_lock);
>
> return 0;
> }
> @@ -500,7 +509,10 @@ static void sysmon_stop(struct rproc_subdev
> *subdev, bool crashed)
> .ssr_event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN
> };
>
> + mutex_lock(&sysmon->state_lock);
> + sysmon->state = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
> blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> + mutex_unlock(&sysmon->state_lock);
>
> /* Don't request graceful shutdown if we've crashed */
> if (crashed)
> @@ -521,7 +533,10 @@ static void sysmon_unprepare(struct rproc_subdev
> *subdev)
> .ssr_event = SSCTL_SSR_EVENT_AFTER_SHUTDOWN
> };
>
> + mutex_lock(&sysmon->state_lock);
> + sysmon->state = SSCTL_SSR_EVENT_AFTER_SHUTDOWN;
> blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> + mutex_unlock(&sysmon->state_lock);
> }
>
> /**
> @@ -534,11 +549,10 @@ static int sysmon_notify(struct notifier_block
> *nb, unsigned long event,
> void *data)
> {
> struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon,
> nb);
> - struct rproc *rproc = sysmon->rproc;
> struct sysmon_event *sysmon_event = data;
>
> /* Skip non-running rprocs and the originating instance */
> - if (rproc->state != RPROC_RUNNING ||
> + if (sysmon->state != SSCTL_SSR_EVENT_AFTER_POWERUP ||
> !strcmp(sysmon_event->subsys_name, sysmon->name)) {
> dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name);
> return NOTIFY_DONE;
> @@ -591,6 +605,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct
> rproc *rproc,
> init_completion(&sysmon->ind_comp);
> init_completion(&sysmon->shutdown_comp);
> mutex_init(&sysmon->lock);
> + mutex_init(&sysmon->state_lock);
>
> sysmon->shutdown_irq = of_irq_get_byname(sysmon->dev->of_node,
> "shutdown-ack");
On 2020-11-04 20:50, Bjorn Andersson wrote:
> Improve the style of a few of the error messages printed by the sysmon
> implementation and fix the copy-pasted shutdown error in the send-event
> function.
>
> Signed-off-by: Bjorn Andersson <[email protected]>
> ---
>
> Changes since v1:
> - New patch
>
> drivers/remoteproc/qcom_sysmon.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/remoteproc/qcom_sysmon.c
> b/drivers/remoteproc/qcom_sysmon.c
> index 1c42f00010d3..47683932512a 100644
> --- a/drivers/remoteproc/qcom_sysmon.c
> +++ b/drivers/remoteproc/qcom_sysmon.c
> @@ -352,9 +352,9 @@ static bool ssctl_request_shutdown(struct
> qcom_sysmon *sysmon)
>
> ret = qmi_txn_wait(&txn, 5 * HZ);
> if (ret < 0) {
> - dev_err(sysmon->dev, "failed receiving QMI response\n");
> + dev_err(sysmon->dev, "timeout waiting for shutdown response\n");
> } else if (resp.resp.result) {
> - dev_err(sysmon->dev, "shutdown request failed\n");
> + dev_err(sysmon->dev, "shutdown request rejected\n");
> } else {
> dev_dbg(sysmon->dev, "shutdown request completed\n");
> acked = true;
> @@ -397,18 +397,18 @@ static void ssctl_send_event(struct qcom_sysmon
> *sysmon,
> SSCTL_SUBSYS_EVENT_REQ, 40,
> ssctl_subsys_event_req_ei, &req);
> if (ret < 0) {
> - dev_err(sysmon->dev, "failed to send shutdown request\n");
> + dev_err(sysmon->dev, "failed to send subsystem event\n");
> qmi_txn_cancel(&txn);
> return;
> }
>
> ret = qmi_txn_wait(&txn, 5 * HZ);
> if (ret < 0)
> - dev_err(sysmon->dev, "failed receiving QMI response\n");
> + dev_err(sysmon->dev, "timeout waiting for subsystem event
> response\n");
> else if (resp.resp.result)
> - dev_err(sysmon->dev, "ssr event send failed\n");
> + dev_err(sysmon->dev, "subsystem event rejected\n");
> else
> - dev_dbg(sysmon->dev, "ssr event send completed\n");
> + dev_dbg(sysmon->dev, "subsystem event accepted\n");
> }
>
> /**
Reviewed-by: Rishabh Bhatnagar <[email protected]>
On 2020-11-04 20:50, Bjorn Andersson wrote:
> A graceful shutdown of the Qualcomm remote processors where
> traditionally performed by invoking a shared memory state signal and
> waiting for the associated ack.
>
> This was later superseded by the "sysmon" mechanism, where some form of
> shared memory bus is used to send a "graceful shutdown request" message
> and one of more signals comes back to indicate its success.
>
> But when this newer mechanism is in effect the firmware is shut down by
> the time the older mechanism, implemented in the remoteproc drivers,
> attempts to perform a graceful shutdown - and as such it will never
> receive an ack back.
>
> This patch therefor track the success of the latest shutdown attempt in
> sysmon and exposes a new function in the API that the remoteproc driver
> can use to query the success and the necessity of invoking the older
> mechanism.
>
> Signed-off-by: Bjorn Andersson <[email protected]>
> ---
>
> Changes since v1:
> - New patch
>
> drivers/remoteproc/qcom_common.h | 6 +++
> drivers/remoteproc/qcom_sysmon.c | 82 ++++++++++++++++++++++++--------
> 2 files changed, 69 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/remoteproc/qcom_common.h
> b/drivers/remoteproc/qcom_common.h
> index dfc641c3a98b..8ba9052955bd 100644
> --- a/drivers/remoteproc/qcom_common.h
> +++ b/drivers/remoteproc/qcom_common.h
> @@ -51,6 +51,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct
> rproc *rproc,
> const char *name,
> int ssctl_instance);
> void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon);
> +bool qcom_sysmon_shutdown_acked(struct qcom_sysmon *sysmon);
> #else
> static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc
> *rproc,
> const char *name,
> @@ -62,6 +63,11 @@ static inline struct qcom_sysmon
> *qcom_add_sysmon_subdev(struct rproc *rproc,
> static inline void qcom_remove_sysmon_subdev(struct qcom_sysmon
> *sysmon)
> {
> }
> +
> +static inline bool qcom_sysmon_shutdown_acked(struct qcom_sysmon
> *sysmon)
> +{
> + return false;
> +}
> #endif
>
> #endif
> diff --git a/drivers/remoteproc/qcom_sysmon.c
> b/drivers/remoteproc/qcom_sysmon.c
> index 38f63c968fa8..1c42f00010d3 100644
> --- a/drivers/remoteproc/qcom_sysmon.c
> +++ b/drivers/remoteproc/qcom_sysmon.c
> @@ -44,6 +44,7 @@ struct qcom_sysmon {
> struct mutex lock;
>
> bool ssr_ack;
> + bool shutdown_acked;
>
> struct qmi_handle qmi;
> struct sockaddr_qrtr ssctl;
> @@ -115,10 +116,13 @@ static void sysmon_send_event(struct qcom_sysmon
> *sysmon,
> /**
> * sysmon_request_shutdown() - request graceful shutdown of remote
> * @sysmon: sysmon context
> + *
> + * Return: boolean indicator of the remote processor acking the
> request
> */
> -static void sysmon_request_shutdown(struct qcom_sysmon *sysmon)
> +static bool sysmon_request_shutdown(struct qcom_sysmon *sysmon)
> {
> char *req = "ssr:shutdown";
> + bool acked = false;
> int ret;
>
> mutex_lock(&sysmon->lock);
> @@ -141,9 +145,13 @@ static void sysmon_request_shutdown(struct
> qcom_sysmon *sysmon)
> if (!sysmon->ssr_ack)
> dev_err(sysmon->dev,
> "unexpected response to sysmon shutdown request\n");
> + else
> + acked = true;
>
> out_unlock:
> mutex_unlock(&sysmon->lock);
> +
> + return acked;
> }
>
> static int sysmon_callback(struct rpmsg_device *rpdev, void *data, int
> count,
> @@ -297,14 +305,33 @@ static struct qmi_msg_handler
> qmi_indication_handler[] = {
> {}
> };
>
> +static bool ssctl_request_shutdown_wait(struct qcom_sysmon *sysmon)
> +{
> + int ret;
> +
> + ret = wait_for_completion_timeout(&sysmon->shutdown_comp, 10 * HZ);
> + if (ret)
> + return true;
> +
> + ret = try_wait_for_completion(&sysmon->ind_comp);
> + if (ret)
> + return true;
> +
> + dev_err(sysmon->dev, "timeout waiting for shutdown ack\n");
> + return false;
> +}
> +
> /**
> * ssctl_request_shutdown() - request shutdown via SSCTL QMI service
> * @sysmon: sysmon context
> + *
> + * Return: boolean indicator of the remote processor acking the
> request
> */
> -static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
> +static bool ssctl_request_shutdown(struct qcom_sysmon *sysmon)
> {
> struct ssctl_shutdown_resp resp;
> struct qmi_txn txn;
> + bool acked = false;
> int ret;
>
> reinit_completion(&sysmon->ind_comp);
> @@ -312,7 +339,7 @@ static void ssctl_request_shutdown(struct
> qcom_sysmon *sysmon)
> ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_shutdown_resp_ei,
> &resp);
> if (ret < 0) {
> dev_err(sysmon->dev, "failed to allocate QMI txn\n");
> - return;
> + return false;
> }
>
> ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
> @@ -320,27 +347,23 @@ static void ssctl_request_shutdown(struct
> qcom_sysmon *sysmon)
> if (ret < 0) {
> dev_err(sysmon->dev, "failed to send shutdown request\n");
> qmi_txn_cancel(&txn);
> - return;
> + return false;
> }
>
> ret = qmi_txn_wait(&txn, 5 * HZ);
> - if (ret < 0)
> + if (ret < 0) {
> dev_err(sysmon->dev, "failed receiving QMI response\n");
> - else if (resp.resp.result)
> + } else if (resp.resp.result) {
> dev_err(sysmon->dev, "shutdown request failed\n");
> - else
> + } else {
> dev_dbg(sysmon->dev, "shutdown request completed\n");
> -
> - if (sysmon->shutdown_irq > 0) {
> - ret = wait_for_completion_timeout(&sysmon->shutdown_comp,
> - 10 * HZ);
> - if (!ret) {
> - ret = try_wait_for_completion(&sysmon->ind_comp);
> - if (!ret)
> - dev_err(sysmon->dev,
> - "timeout waiting for shutdown ack\n");
> - }
> + acked = true;
> }
> +
> + if (sysmon->shutdown_irq > 0)
> + return ssctl_request_shutdown_wait(sysmon);
> +
> + return acked;
> }
>
> /**
> @@ -508,6 +531,9 @@ static void sysmon_stop(struct rproc_subdev
> *subdev, bool crashed)
> .subsys_name = sysmon->name,
> .ssr_event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN
> };
> + bool acked;
> +
> + sysmon->shutdown_acked = false;
>
> mutex_lock(&sysmon->state_lock);
> sysmon->state = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
> @@ -519,9 +545,11 @@ static void sysmon_stop(struct rproc_subdev
> *subdev, bool crashed)
> return;
>
> if (sysmon->ssctl_version)
> - ssctl_request_shutdown(sysmon);
> + acked = ssctl_request_shutdown(sysmon);
> else if (sysmon->ept)
> - sysmon_request_shutdown(sysmon);
> + acked = sysmon_request_shutdown(sysmon);
> +
> + sysmon->shutdown_acked = acked;
> }
>
> static void sysmon_unprepare(struct rproc_subdev *subdev)
> @@ -679,6 +707,22 @@ void qcom_remove_sysmon_subdev(struct qcom_sysmon
> *sysmon)
> }
> EXPORT_SYMBOL_GPL(qcom_remove_sysmon_subdev);
>
> +/**
> + * qcom_sysmon_shutdown_acked() - query the success of the last
> shutdown
> + * @sysmon: sysmon context
> + *
> + * When sysmon is used to request a graceful shutdown of the remote
> processor
> + * this can be used by the remoteproc driver to query the success, in
> order to
> + * know if it should fall back to other means of requesting a
> shutdown.
> + *
> + * Return: boolean indicator of the success of the last shutdown
> request
> + */
> +bool qcom_sysmon_shutdown_acked(struct qcom_sysmon *sysmon)
> +{
> + return sysmon && sysmon->shutdown_acked;
> +}
> +EXPORT_SYMBOL_GPL(qcom_sysmon_shutdown_acked);
> +
> /**
> * sysmon_probe() - probe sys_mon channel
> * @rpdev: rpmsg device handle
Reviewed-by: Rishabh Bhatnagar <[email protected]>
On Tue 10 Nov 18:57 CST 2020, [email protected] wrote:
> On 2020-11-04 20:50, Bjorn Andersson wrote:
> > The reliance on the remoteproc's state for determining when to send
> > sysmon notifications to a remote processor is racy with regard to
> > concurrent remoteproc operations.
> >
> > Further more the advertisement of the state of other remote processor to
> > a newly started remote processor might not only send the wrong state,
> > but might result in a stream of state changes that are out of order.
> >
> > Address this by introducing state tracking within the sysmon instances
> > themselves and extend the locking to ensure that the notifications are
> > consistent with this state.
> >
> > Fixes: 1f36ab3f6e3b ("remoteproc: sysmon: Inform current rproc about
> > all active rprocs")
> > Fixes: 1877f54f75ad ("remoteproc: sysmon: Add notifications for events")
> > Fixes: 1fb82ee806d1 ("remoteproc: qcom: Introduce sysmon")
> > Cc: [email protected]
> > Signed-off-by: Bjorn Andersson <[email protected]>
> > ---
> >
> > Changes since v1:
> > - Reduced the locking to be per sysmon instance
> > - Dropped unused local "rproc" variable in sysmon_notify()
> >
> > drivers/remoteproc/qcom_sysmon.c | 27 +++++++++++++++++++++------
> > 1 file changed, 21 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/remoteproc/qcom_sysmon.c
> > b/drivers/remoteproc/qcom_sysmon.c
> > index 9eb2f6bccea6..38f63c968fa8 100644
> > --- a/drivers/remoteproc/qcom_sysmon.c
> > +++ b/drivers/remoteproc/qcom_sysmon.c
> > @@ -22,6 +22,9 @@ struct qcom_sysmon {
> > struct rproc_subdev subdev;
> > struct rproc *rproc;
> >
> > + int state;
> > + struct mutex state_lock;
> > +
> > struct list_head node;
> >
> > const char *name;
> > @@ -448,7 +451,10 @@ static int sysmon_prepare(struct rproc_subdev
> > *subdev)
> > .ssr_event = SSCTL_SSR_EVENT_BEFORE_POWERUP
> > };
> >
> > + mutex_lock(&sysmon->state_lock);
> > + sysmon->state = SSCTL_SSR_EVENT_BEFORE_POWERUP;
> > blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> > + mutex_unlock(&sysmon->state_lock);
> >
> > return 0;
> > }
> > @@ -472,22 +478,25 @@ static int sysmon_start(struct rproc_subdev
> > *subdev)
> > .ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP
> > };
> >
> > + mutex_lock(&sysmon->state_lock);
> > + sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP;
> > blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> > + mutex_unlock(&sysmon->state_lock);
> >
> > - mutex_lock(&sysmon_lock);
>
> We should keep the sysmon_lock to make sure sysmon_list is not modified
> at the time we are doing this operation?
Yes, that seems like a very good idea. I will review and update.
> > list_for_each_entry(target, &sysmon_list, node) {
> > - if (target == sysmon ||
> > - target->rproc->state != RPROC_RUNNING)
> > + if (target == sysmon)
> > continue;
> >
> > + mutex_lock(&target->state_lock);
> > event.subsys_name = target->name;
> > + event.ssr_event = target->state;
>
> Is it better to only send this event when target->state is
> "SSCTL_SSR_EVENT_AFTER_POWERUP"?
It depends on what the remote's requirements, I tested this and didn't
see any problems sending both SSCTL_SSR_EVENT_AFTER_POWERUP and
SSCTL_SSR_EVENT_AFTER_SHUTDOWN at least...
I don't know if I managed to hit a case where I sent any of the
intermediate states.
If you could provide some more input here I would appreciate it -
although I would be happy to merge the patch after fixing above locking
issue and then we can limit the events sent once we have a more detailed
answer, if that helps.
Regards,
Bjorn
On 11/4/20 10:50 PM, Bjorn Andersson wrote:
> The core part of this series is the update to the sysmon driver to ensure that
> notifications sent to the remote processor are consistent and always present
> valid state transitions.
>
> In testing this I finally took the time to fix up the issue of the SMP2P based
> graceful shutdown in the remoteproc drivers always timing out if sysmon has
> already successfully shut down the remote processor.
>
> Bjorn Andersson (4):
> remoteproc: sysmon: Ensure remote notification ordering
> remoteproc: sysmon: Expose the shutdown result
> remoteproc: qcom: q6v5: Query sysmon before graceful shutdown
> remoteproc: sysmon: Improve error messages
>
> drivers/remoteproc/qcom_common.h | 6 ++
> drivers/remoteproc/qcom_q6v5.c | 8 +-
> drivers/remoteproc/qcom_q6v5.h | 3 +-
> drivers/remoteproc/qcom_q6v5_adsp.c | 2 +-
> drivers/remoteproc/qcom_q6v5_mss.c | 2 +-
> drivers/remoteproc/qcom_q6v5_pas.c | 2 +-
> drivers/remoteproc/qcom_q6v5_wcss.c | 2 +-
> drivers/remoteproc/qcom_sysmon.c | 121 +++++++++++++++++++++-------
> 8 files changed, 109 insertions(+), 37 deletions(-)
>
Entire series tested on Lenovo Yoga C630
Tested-by: Steev Klimaszewski <[email protected]>