2022-01-21 19:59:32

by Mukesh Ojha

[permalink] [raw]
Subject: [PATCH] remoteproc: Use unbounded/high priority workqueue for recovery work

There could be a scenario where there is too much load(n number
of tasks which is affined) on a core on which rproc recovery
is queued. Due to which, it takes number of seconds to complete
the recovery.

If we make this queue unbounded and move it to high priority worker
pool then this work can be attempted to finished in less time.

Signed-off-by: Mukesh Ojha <[email protected]>
---
drivers/remoteproc/remoteproc_core.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 69f51ac..efb6316 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,

/* Unique indices for remoteproc devices */
static DEFINE_IDA(rproc_dev_index);
+static struct workqueue_struct *rproc_recovery_wq;

static const char * const rproc_crash_names[] = {
[RPROC_MMUFAULT] = "mmufault",
@@ -2752,8 +2753,10 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
dev_err(&rproc->dev, "crash detected in %s: type %s\n",
rproc->name, rproc_crash_to_string(type));

- /* Have a worker handle the error; ensure system is not suspended */
- queue_work(system_freezable_wq, &rproc->crash_handler);
+ if (rproc_recovery_wq)
+ queue_work(rproc_recovery_wq, &rproc->crash_handler);
+ else
+ queue_work(system_freezable_wq, &rproc->crash_handler);
}
EXPORT_SYMBOL(rproc_report_crash);

@@ -2802,6 +2805,11 @@ static void __exit rproc_exit_panic(void)

static int __init remoteproc_init(void)
{
+ rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", WQ_UNBOUND |
+ WQ_HIGHPRI | WQ_FREEZABLE, 0);
+ if (!rproc_recovery_wq)
+ pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
+
rproc_init_sysfs();
rproc_init_debugfs();
rproc_init_cdev();
@@ -2818,6 +2826,8 @@ static void __exit remoteproc_exit(void)
rproc_exit_panic();
rproc_exit_debugfs();
rproc_exit_sysfs();
+ if (rproc_recovery_wq)
+ destroy_workqueue(rproc_recovery_wq);
}
module_exit(remoteproc_exit);

--
2.7.4


2022-01-24 19:22:24

by Mukesh Ojha

[permalink] [raw]
Subject: Re: [PATCH] remoteproc: Use unbounded/high priority workqueue for recovery work

+linux-arm-msm

Thanks,
-Mukesh

On 1/20/2022 1:00 AM, Mukesh Ojha wrote:
> There could be a scenario where there is too much load(n number
> of tasks which is affined) on a core on which rproc recovery
> is queued. Due to which, it takes number of seconds to complete
> the recovery.
>
> If we make this queue unbounded and move it to high priority worker
> pool then this work can be attempted to finished in less time.
>
> Signed-off-by: Mukesh Ojha <[email protected]>
> ---
> drivers/remoteproc/remoteproc_core.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 69f51ac..efb6316 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>
> /* Unique indices for remoteproc devices */
> static DEFINE_IDA(rproc_dev_index);
> +static struct workqueue_struct *rproc_recovery_wq;
>
> static const char * const rproc_crash_names[] = {
> [RPROC_MMUFAULT] = "mmufault",
> @@ -2752,8 +2753,10 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
> dev_err(&rproc->dev, "crash detected in %s: type %s\n",
> rproc->name, rproc_crash_to_string(type));
>
> - /* Have a worker handle the error; ensure system is not suspended */
> - queue_work(system_freezable_wq, &rproc->crash_handler);
> + if (rproc_recovery_wq)
> + queue_work(rproc_recovery_wq, &rproc->crash_handler);
> + else
> + queue_work(system_freezable_wq, &rproc->crash_handler);
> }
> EXPORT_SYMBOL(rproc_report_crash);
>
> @@ -2802,6 +2805,11 @@ static void __exit rproc_exit_panic(void)
>
> static int __init remoteproc_init(void)
> {
> + rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", WQ_UNBOUND |
> + WQ_HIGHPRI | WQ_FREEZABLE, 0);
> + if (!rproc_recovery_wq)
> + pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
> +
> rproc_init_sysfs();
> rproc_init_debugfs();
> rproc_init_cdev();
> @@ -2818,6 +2826,8 @@ static void __exit remoteproc_exit(void)
> rproc_exit_panic();
> rproc_exit_debugfs();
> rproc_exit_sysfs();
> + if (rproc_recovery_wq)
> + destroy_workqueue(rproc_recovery_wq);
> }
> module_exit(remoteproc_exit);
>

2022-03-11 23:33:46

by Bjorn Andersson

[permalink] [raw]
Subject: Re: [PATCH] remoteproc: Use unbounded/high priority workqueue for recovery work

On Wed 19 Jan 13:30 CST 2022, Mukesh Ojha wrote:

> There could be a scenario where there is too much load(n number
> of tasks which is affined) on a core on which rproc recovery
> is queued. Due to which, it takes number of seconds to complete
> the recovery.
>
> If we make this queue unbounded and move it to high priority worker
> pool then this work can be attempted to finished in less time.

I unfortunately find this reasoning for adding WQ_HIGHPRI rather
speculative. Please describe a concrete case that warrants the new
work queue to be high priority.

What is "number of seconds", what is "less time" and why is it more
important to recover some remote processor than whatever else the system
is busy doing?

Thanks,
Bjorn

>
> Signed-off-by: Mukesh Ojha <[email protected]>
> ---
> drivers/remoteproc/remoteproc_core.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 69f51ac..efb6316 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>
> /* Unique indices for remoteproc devices */
> static DEFINE_IDA(rproc_dev_index);
> +static struct workqueue_struct *rproc_recovery_wq;
>
> static const char * const rproc_crash_names[] = {
> [RPROC_MMUFAULT] = "mmufault",
> @@ -2752,8 +2753,10 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
> dev_err(&rproc->dev, "crash detected in %s: type %s\n",
> rproc->name, rproc_crash_to_string(type));
>
> - /* Have a worker handle the error; ensure system is not suspended */
> - queue_work(system_freezable_wq, &rproc->crash_handler);
> + if (rproc_recovery_wq)
> + queue_work(rproc_recovery_wq, &rproc->crash_handler);
> + else
> + queue_work(system_freezable_wq, &rproc->crash_handler);
> }
> EXPORT_SYMBOL(rproc_report_crash);
>
> @@ -2802,6 +2805,11 @@ static void __exit rproc_exit_panic(void)
>
> static int __init remoteproc_init(void)
> {
> + rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", WQ_UNBOUND |
> + WQ_HIGHPRI | WQ_FREEZABLE, 0);
> + if (!rproc_recovery_wq)
> + pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
> +
> rproc_init_sysfs();
> rproc_init_debugfs();
> rproc_init_cdev();
> @@ -2818,6 +2826,8 @@ static void __exit remoteproc_exit(void)
> rproc_exit_panic();
> rproc_exit_debugfs();
> rproc_exit_sysfs();
> + if (rproc_recovery_wq)
> + destroy_workqueue(rproc_recovery_wq);
> }
> module_exit(remoteproc_exit);
>
> --
> 2.7.4
>

2022-03-29 10:38:51

by Mukesh Ojha

[permalink] [raw]
Subject: Re: [PATCH] remoteproc: Use unbounded/high priority workqueue for recovery work


On 3/12/2022 2:31 AM, Bjorn Andersson wrote:
> On Wed 19 Jan 13:30 CST 2022, Mukesh Ojha wrote:
>
>> There could be a scenario where there is too much load(n number
>> of tasks which is affined) on a core on which rproc recovery
>> is queued. Due to which, it takes number of seconds to complete
>> the recovery.
>>
>> If we make this queue unbounded and move it to high priority worker
>> pool then this work can be attempted to finished in less time.
> I unfortunately find this reasoning for adding WQ_HIGHPRI rather
> speculative. Please describe a concrete case that warrants the new
> work queue to be high priority.
>
> What is "number of seconds", what is "less time" and why is it more
> important to recover some remote processor than whatever else the system
> is busy doing?

Meanwhile, I will try to check if making it unbound only helps us in our
low latency use cases.
So, does it make sense to make it  Unbound | freezable ?

-Mukesh

> Thanks,
> Bjorn
>
>> Signed-off-by: Mukesh Ojha <[email protected]>
>> ---
>> drivers/remoteproc/remoteproc_core.c | 14 ++++++++++++--
>> 1 file changed, 12 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
>> index 69f51ac..efb6316 100644
>> --- a/drivers/remoteproc/remoteproc_core.c
>> +++ b/drivers/remoteproc/remoteproc_core.c
>> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>>
>> /* Unique indices for remoteproc devices */
>> static DEFINE_IDA(rproc_dev_index);
>> +static struct workqueue_struct *rproc_recovery_wq;
>>
>> static const char * const rproc_crash_names[] = {
>> [RPROC_MMUFAULT] = "mmufault",
>> @@ -2752,8 +2753,10 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>> dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>> rproc->name, rproc_crash_to_string(type));
>>
>> - /* Have a worker handle the error; ensure system is not suspended */
>> - queue_work(system_freezable_wq, &rproc->crash_handler);
>> + if (rproc_recovery_wq)
>> + queue_work(rproc_recovery_wq, &rproc->crash_handler);
>> + else
>> + queue_work(system_freezable_wq, &rproc->crash_handler);
>> }
>> EXPORT_SYMBOL(rproc_report_crash);
>>
>> @@ -2802,6 +2805,11 @@ static void __exit rproc_exit_panic(void)
>>
>> static int __init remoteproc_init(void)
>> {
>> + rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", WQ_UNBOUND |
>> + WQ_HIGHPRI | WQ_FREEZABLE, 0);
>> + if (!rproc_recovery_wq)
>> + pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
>> +
>> rproc_init_sysfs();
>> rproc_init_debugfs();
>> rproc_init_cdev();
>> @@ -2818,6 +2826,8 @@ static void __exit remoteproc_exit(void)
>> rproc_exit_panic();
>> rproc_exit_debugfs();
>> rproc_exit_sysfs();
>> + if (rproc_recovery_wq)
>> + destroy_workqueue(rproc_recovery_wq);
>> }
>> module_exit(remoteproc_exit);
>>
>> --
>> 2.7.4
>>