From: Richard Maina <[email protected]>
When remoteproc goes down unexpectedly this results in a state where any
acquired hwspinlocks will remain locked possibly resulting in deadlock.
In order to ensure all locks are freed we include a call to
hwspin_lock_bust() during remoteproc shutdown.
For qcom_q6v5_pas remoteprocs, each remoteproc has an assigned id that
is used to take the hwspinlock. Remoteproc should use this id to try and
bust the lock on remoteproc stop.
This edge case only occurs with q6v5_pas watchdog crashes. The error
fatal case has handling to clear the hwspinlock before the error fatal
interrupt is triggered.
Signed-off-by: Richard Maina <[email protected]>
Signed-off-by: Chris Lew <[email protected]>
---
drivers/remoteproc/qcom_q6v5_pas.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
index 54d8005d40a3..57178fcb9aa3 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -10,6 +10,7 @@
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/firmware.h>
+#include <linux/hwspinlock.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -52,6 +53,7 @@ struct adsp_data {
const char *ssr_name;
const char *sysmon_name;
int ssctl_id;
+ int hwlock_id;
int region_assign_idx;
int region_assign_count;
@@ -84,6 +86,9 @@ struct qcom_adsp {
bool decrypt_shutdown;
const char *info_name;
+ struct hwspinlock *hwlock;
+ int hwlock_id;
+
const struct firmware *firmware;
const struct firmware *dtb_firmware;
@@ -399,6 +404,12 @@ static int adsp_stop(struct rproc *rproc)
if (handover)
qcom_pas_handover(&adsp->q6v5);
+ if (adsp->hwlock) {
+ ret = hwspin_lock_bust(adsp->hwlock, adsp->hwlock_id);
+ if (ret)
+ dev_info(adsp->dev, "failed to bust hwspinlock\n");
+ }
+
return ret;
}
@@ -684,6 +695,7 @@ static int adsp_probe(struct platform_device *pdev)
struct rproc *rproc;
const char *fw_name, *dtb_fw_name = NULL;
const struct rproc_ops *ops = &adsp_ops;
+ int hwlock_idx;
int ret;
desc = of_device_get_match_data(&pdev->dev);
@@ -736,6 +748,17 @@ static int adsp_probe(struct platform_device *pdev)
adsp->dtb_firmware_name = dtb_fw_name;
adsp->dtb_pas_id = desc->dtb_pas_id;
}
+
+ if (desc->hwlock_id) {
+ adsp->hwlock_id = desc->hwlock_id;
+ hwlock_idx = of_hwspin_lock_get_id(pdev->dev.of_node, 0);
+ if (hwlock_idx >= 0) {
+ adsp->hwlock = hwspin_lock_request_specific(hwlock_idx);
+ if (!adsp->hwlock)
+ dev_err(&pdev->dev, "failed to request hwspinlock\n");
+ }
+ }
+
platform_set_drvdata(pdev, adsp);
ret = device_init_wakeup(adsp->dev, true);
@@ -1196,6 +1219,7 @@ static const struct adsp_data sm8550_adsp_resource = {
.ssr_name = "lpass",
.sysmon_name = "adsp",
.ssctl_id = 0x14,
+ .hwlock_id = 3,
};
static const struct adsp_data sm8550_cdsp_resource = {
@@ -1216,6 +1240,7 @@ static const struct adsp_data sm8550_cdsp_resource = {
.ssr_name = "cdsp",
.sysmon_name = "cdsp",
.ssctl_id = 0x17,
+ .hwlock_id = 6,
};
static const struct adsp_data sm8550_mpss_resource = {
@@ -1236,6 +1261,7 @@ static const struct adsp_data sm8550_mpss_resource = {
.ssr_name = "mpss",
.sysmon_name = "modem",
.ssctl_id = 0x12,
+ .hwlock_id = 2,
.region_assign_idx = 2,
.region_assign_count = 1,
.region_assign_vmid = QCOM_SCM_VMID_MSS_MSA,
@@ -1275,6 +1301,7 @@ static const struct adsp_data sm8650_cdsp_resource = {
.ssr_name = "cdsp",
.sysmon_name = "cdsp",
.ssctl_id = 0x17,
+ .hwlock_id = 6,
.region_assign_idx = 2,
.region_assign_count = 1,
.region_assign_shared = true,
@@ -1299,6 +1326,7 @@ static const struct adsp_data sm8650_mpss_resource = {
.ssr_name = "mpss",
.sysmon_name = "modem",
.ssctl_id = 0x12,
+ .hwlock_id = 2,
.region_assign_idx = 2,
.region_assign_count = 3,
.region_assign_vmid = QCOM_SCM_VMID_MSS_MSA,
--
2.25.1
On 5/17/2024 4:28 AM, Chris Lew wrote:
> From: Richard Maina <[email protected]>
>
> When remoteproc goes down unexpectedly this results in a state where any
> acquired hwspinlocks will remain locked possibly resulting in deadlock.
> In order to ensure all locks are freed we include a call to
> hwspin_lock_bust() during remoteproc shutdown.
>
> For qcom_q6v5_pas remoteprocs, each remoteproc has an assigned id that
> is used to take the hwspinlock. Remoteproc should use this id to try and
> bust the lock on remoteproc stop.
>
> This edge case only occurs with q6v5_pas watchdog crashes. The error
> fatal case has handling to clear the hwspinlock before the error fatal
> interrupt is triggered.
>
> Signed-off-by: Richard Maina <[email protected]>
> Signed-off-by: Chris Lew <[email protected]>
> ---
> drivers/remoteproc/qcom_q6v5_pas.c | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
> diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
> index 54d8005d40a3..57178fcb9aa3 100644
> --- a/drivers/remoteproc/qcom_q6v5_pas.c
> +++ b/drivers/remoteproc/qcom_q6v5_pas.c
> @@ -10,6 +10,7 @@
> #include <linux/clk.h>
> #include <linux/delay.h>
> #include <linux/firmware.h>
> +#include <linux/hwspinlock.h>
> #include <linux/interrupt.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> @@ -52,6 +53,7 @@ struct adsp_data {
> const char *ssr_name;
> const char *sysmon_name;
> int ssctl_id;
> + int hwlock_id;
>
> int region_assign_idx;
> int region_assign_count;
> @@ -84,6 +86,9 @@ struct qcom_adsp {
> bool decrypt_shutdown;
> const char *info_name;
>
> + struct hwspinlock *hwlock;
> + int hwlock_id;
> +
> const struct firmware *firmware;
> const struct firmware *dtb_firmware;
>
> @@ -399,6 +404,12 @@ static int adsp_stop(struct rproc *rproc)
> if (handover)
> qcom_pas_handover(&adsp->q6v5);
>
> + if (adsp->hwlock) {
> + ret = hwspin_lock_bust(adsp->hwlock, adsp->hwlock_id);
> + if (ret)
> + dev_info(adsp->dev, "failed to bust hwspinlock\n");
> + }
> +
What if recovery is disabled, fatal case is clearing the hwspinlock but
wdog case is not, right ?
-Mukesh
On 5/17/2024 4:28 AM, Chris Lew wrote:
> From: Richard Maina <[email protected]>
>
> When remoteproc goes down unexpectedly this results in a state where any
> acquired hwspinlocks will remain locked possibly resulting in deadlock.
> In order to ensure all locks are freed we include a call to
> hwspin_lock_bust() during remoteproc shutdown.
>
> For qcom_q6v5_pas remoteprocs, each remoteproc has an assigned id that
> is used to take the hwspinlock. Remoteproc should use this id to try and
> bust the lock on remoteproc stop.
>
> This edge case only occurs with q6v5_pas watchdog crashes. The error
> fatal case has handling to clear the hwspinlock before the error fatal
> interrupt is triggered.
>
> Signed-off-by: Richard Maina <[email protected]>
> Signed-off-by: Chris Lew <[email protected]>
> ---
> drivers/remoteproc/qcom_q6v5_pas.c | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
> diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
> index 54d8005d40a3..57178fcb9aa3 100644
> --- a/drivers/remoteproc/qcom_q6v5_pas.c
> +++ b/drivers/remoteproc/qcom_q6v5_pas.c
> @@ -10,6 +10,7 @@
> #include <linux/clk.h>
> #include <linux/delay.h>
> #include <linux/firmware.h>
> +#include <linux/hwspinlock.h>
> #include <linux/interrupt.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> @@ -52,6 +53,7 @@ struct adsp_data {
> const char *ssr_name;
> const char *sysmon_name;
> int ssctl_id;
> + int hwlock_id;
>
> int region_assign_idx;
> int region_assign_count;
> @@ -84,6 +86,9 @@ struct qcom_adsp {
> bool decrypt_shutdown;
> const char *info_name;
>
> + struct hwspinlock *hwlock;
> + int hwlock_id;
> +
> const struct firmware *firmware;
> const struct firmware *dtb_firmware;
>
> @@ -399,6 +404,12 @@ static int adsp_stop(struct rproc *rproc)
> if (handover)
> qcom_pas_handover(&adsp->q6v5);
>
> + if (adsp->hwlock) {
> + ret = hwspin_lock_bust(adsp->hwlock, adsp->hwlock_id);
> + if (ret)
> + dev_info(adsp->dev, "failed to bust hwspinlock\n");
> + }
> +
As you said above, you seem to cover only wdog case and fatal cases
are already handled at remote;
You are clearing here for both ? is this right understanding ?
-Mukesh
On 17/05/2024 00:58, Chris Lew wrote:
> From: Richard Maina <[email protected]>
>
> When remoteproc goes down unexpectedly this results in a state where any
> acquired hwspinlocks will remain locked possibly resulting in deadlock.
> In order to ensure all locks are freed we include a call to
> hwspin_lock_bust() during remoteproc shutdown.
>
> For qcom_q6v5_pas remoteprocs, each remoteproc has an assigned id that
> is used to take the hwspinlock. Remoteproc should use this id to try and
> bust the lock on remoteproc stop.
>
> This edge case only occurs with q6v5_pas watchdog crashes. The error
> fatal case has handling to clear the hwspinlock before the error fatal
> interrupt is triggered.
>
> Signed-off-by: Richard Maina <[email protected]>
> Signed-off-by: Chris Lew <[email protected]>
> ---
> + if (adsp->hwlock) {
> + ret = hwspin_lock_bust(adsp->hwlock, adsp->hwlock_id);
> + if (ret)
> + dev_info(adsp->dev, "failed to bust hwspinlock\n");
qcom_hwspinlock_bust() already prints an error on failure, you're
printing a second error here.
Choose at most one.
---
bod
On 5/17/2024 2:08 AM, Bryan O'Donoghue wrote:
> On 17/05/2024 00:58, Chris Lew wrote:
>> From: Richard Maina <[email protected]>
>>
>> When remoteproc goes down unexpectedly this results in a state where any
>> acquired hwspinlocks will remain locked possibly resulting in deadlock.
>> In order to ensure all locks are freed we include a call to
>> hwspin_lock_bust() during remoteproc shutdown.
>>
>> For qcom_q6v5_pas remoteprocs, each remoteproc has an assigned id that
>> is used to take the hwspinlock. Remoteproc should use this id to try and
>> bust the lock on remoteproc stop.
>>
>> This edge case only occurs with q6v5_pas watchdog crashes. The error
>> fatal case has handling to clear the hwspinlock before the error fatal
>> interrupt is triggered.
>>
>> Signed-off-by: Richard Maina <[email protected]>
>> Signed-off-by: Chris Lew <[email protected]>
>> ---
>
>> + if (adsp->hwlock) {
>> + ret = hwspin_lock_bust(adsp->hwlock, adsp->hwlock_id);
>> + if (ret)
>> + dev_info(adsp->dev, "failed to bust hwspinlock\n");
>
> qcom_hwspinlock_bust() already prints an error on failure, you're
> printing a second error here.
>
> Choose at most one.
>
Ack, will remove the error print here and leave the one in
qcom_hwspinlock_bust()
> ---
> bod
On 5/17/2024 12:21 AM, Mukesh Ojha wrote:
>
>
> On 5/17/2024 4:28 AM, Chris Lew wrote:
>> From: Richard Maina <[email protected]>
>>
>> When remoteproc goes down unexpectedly this results in a state where any
>> acquired hwspinlocks will remain locked possibly resulting in deadlock.
>> In order to ensure all locks are freed we include a call to
>> hwspin_lock_bust() during remoteproc shutdown.
>>
>> For qcom_q6v5_pas remoteprocs, each remoteproc has an assigned id that
>> is used to take the hwspinlock. Remoteproc should use this id to try and
>> bust the lock on remoteproc stop.
>>
>> This edge case only occurs with q6v5_pas watchdog crashes. The error
>> fatal case has handling to clear the hwspinlock before the error fatal
>> interrupt is triggered.
>>
>> Signed-off-by: Richard Maina <[email protected]>
>> Signed-off-by: Chris Lew <[email protected]>
>> ---
>> drivers/remoteproc/qcom_q6v5_pas.c | 28 ++++++++++++++++++++++++++++
>> 1 file changed, 28 insertions(+)
>>
>> diff --git a/drivers/remoteproc/qcom_q6v5_pas.c
>> b/drivers/remoteproc/qcom_q6v5_pas.c
>> index 54d8005d40a3..57178fcb9aa3 100644
>> --- a/drivers/remoteproc/qcom_q6v5_pas.c
>> +++ b/drivers/remoteproc/qcom_q6v5_pas.c
>> @@ -10,6 +10,7 @@
>> #include <linux/clk.h>
>> #include <linux/delay.h>
>> #include <linux/firmware.h>
>> +#include <linux/hwspinlock.h>
>> #include <linux/interrupt.h>
>> #include <linux/kernel.h>
>> #include <linux/module.h>
>> @@ -52,6 +53,7 @@ struct adsp_data {
>> const char *ssr_name;
>> const char *sysmon_name;
>> int ssctl_id;
>> + int hwlock_id;
>> int region_assign_idx;
>> int region_assign_count;
>> @@ -84,6 +86,9 @@ struct qcom_adsp {
>> bool decrypt_shutdown;
>> const char *info_name;
>> + struct hwspinlock *hwlock;
>> + int hwlock_id;
>> +
>> const struct firmware *firmware;
>> const struct firmware *dtb_firmware;
>> @@ -399,6 +404,12 @@ static int adsp_stop(struct rproc *rproc)
>> if (handover)
>> qcom_pas_handover(&adsp->q6v5);
>> + if (adsp->hwlock) {
>> + ret = hwspin_lock_bust(adsp->hwlock, adsp->hwlock_id);
>> + if (ret)
>> + dev_info(adsp->dev, "failed to bust hwspinlock\n");
>> + }
>> +
>
> As you said above, you seem to cover only wdog case and fatal cases
> are already handled at remote;
>
> You are clearing here for both ? is this right understanding ?
>
Yes that is correct. While the firmware is able to handle error fatal
cases, I think it is still the responsibility of the remoteproc driver
to try and bust the lock on behalf of the rproc in both cases.
The bust will only clear the spinlock if it has the token (specific id
for that rproc) that is passed into hwspin_lock_bust, so it is safe to
attempt this even if the value has been cleared by the rproc in the
error fatal case.
> -Mukesh
On 5/17/24 00:58, Chris Lew wrote:
> From: Richard Maina <[email protected]>
>
> When remoteproc goes down unexpectedly this results in a state where any
> acquired hwspinlocks will remain locked possibly resulting in deadlock.
> In order to ensure all locks are freed we include a call to
> hwspin_lock_bust() during remoteproc shutdown.
>
> For qcom_q6v5_pas remoteprocs, each remoteproc has an assigned id that
> is used to take the hwspinlock. Remoteproc should use this id to try and
> bust the lock on remoteproc stop.
>
> This edge case only occurs with q6v5_pas watchdog crashes. The error
> fatal case has handling to clear the hwspinlock before the error fatal
> interrupt is triggered.
>
> Signed-off-by: Richard Maina <[email protected]>
> Signed-off-by: Chris Lew <[email protected]>
> ---
> drivers/remoteproc/qcom_q6v5_pas.c | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
> diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
> index 54d8005d40a3..57178fcb9aa3 100644
> --- a/drivers/remoteproc/qcom_q6v5_pas.c
> +++ b/drivers/remoteproc/qcom_q6v5_pas.c
> @@ -10,6 +10,7 @@
> #include <linux/clk.h>
> #include <linux/delay.h>
> #include <linux/firmware.h>
> +#include <linux/hwspinlock.h>
> #include <linux/interrupt.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> @@ -52,6 +53,7 @@ struct adsp_data {
> const char *ssr_name;
> const char *sysmon_name;
> int ssctl_id;
> + int hwlock_id;
>
> int region_assign_idx;
> int region_assign_count;
> @@ -84,6 +86,9 @@ struct qcom_adsp {
> bool decrypt_shutdown;
> const char *info_name;
>
> + struct hwspinlock *hwlock;
> + int hwlock_id;
IIRC, this is the same one that is passed in the DT.
Can we get it dynamically from there?
Konrad
On 5/21/2024 10:38 AM, Konrad Dybcio wrote:
>
>
> On 5/17/24 00:58, Chris Lew wrote:
>> From: Richard Maina <[email protected]>
>>
>> When remoteproc goes down unexpectedly this results in a state where any
>> acquired hwspinlocks will remain locked possibly resulting in deadlock.
>> In order to ensure all locks are freed we include a call to
>> hwspin_lock_bust() during remoteproc shutdown.
>>
>> For qcom_q6v5_pas remoteprocs, each remoteproc has an assigned id that
>> is used to take the hwspinlock. Remoteproc should use this id to try and
>> bust the lock on remoteproc stop.
>>
>> This edge case only occurs with q6v5_pas watchdog crashes. The error
>> fatal case has handling to clear the hwspinlock before the error fatal
>> interrupt is triggered.
>>
>> Signed-off-by: Richard Maina <[email protected]>
>> Signed-off-by: Chris Lew <[email protected]>
>> ---
>
>
>> drivers/remoteproc/qcom_q6v5_pas.c | 28 ++++++++++++++++++++++++++++
>> 1 file changed, 28 insertions(+)
>>
>> diff --git a/drivers/remoteproc/qcom_q6v5_pas.c
>> b/drivers/remoteproc/qcom_q6v5_pas.c
>> index 54d8005d40a3..57178fcb9aa3 100644
>> --- a/drivers/remoteproc/qcom_q6v5_pas.c
>> +++ b/drivers/remoteproc/qcom_q6v5_pas.c
>> @@ -10,6 +10,7 @@
>> #include <linux/clk.h>
>> #include <linux/delay.h>
>> #include <linux/firmware.h>
>> +#include <linux/hwspinlock.h>
>> #include <linux/interrupt.h>
>> #include <linux/kernel.h>
>> #include <linux/module.h>
>> @@ -52,6 +53,7 @@ struct adsp_data {
>> const char *ssr_name;
>> const char *sysmon_name;
>> int ssctl_id;
>> + int hwlock_id;
>> int region_assign_idx;
>> int region_assign_count;
>> @@ -84,6 +86,9 @@ struct qcom_adsp {
>> bool decrypt_shutdown;
>> const char *info_name;
>> + struct hwspinlock *hwlock;
>> + int hwlock_id;
>
> IIRC, this is the same one that is passed in the DT.
>
> Can we get it dynamically from there?
>
The argument passed in DT is the index of the hwlock in the TCSR mutex
region. The index determines use of hwlock[0..n]
This id is supposed to be the identifier that is passed into
hwspin_lock_bust(). The actual value that we would read from
hwlock[0..n] to see if we need to bust the lock.
Maybe the naming of this variable is confusing. Do you have any
suggestions to make it clearer? Could call it hwlock_bust_id.
We could also try increasing the #hwlock-cells to 2 and have something
like <&phandle index bust_id>. To me this seemed odd for clients that
weren't planning on using the bust_id.
> Konrad