2023-06-07 12:34:12

by Manikanta Mylavarapu

[permalink] [raw]
Subject: [PATCH 0/2] change glink work queue type

Since rx & intent worker threads are bound
to kernel global workqueue, it takes more
time to execute. If these worker threads
are not executed within stipulated duration
QDSP6 will OOM and glink client drivers get
timeout error.

This series will change work queue type to
UNBOUND work queue to ensure worker thread
execute as early as possible.

It's verified on IPQ807x.

Manikanta Mylavarapu (2):
rpmsg: glink: change rx work queue type
rpmsg: glink: change intent work queue type

drivers/rpmsg/qcom_glink_native.c | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)


base-commit: abbd8bb42915d9ed06df11b430bf4ecb3d8ac5ad
--
2.17.1



2023-06-07 12:34:20

by Manikanta Mylavarapu

[permalink] [raw]
Subject: [PATCH 2/2] rpmsg: glink: change intent work queue type

QDSP6 will clear heap memory once it's received
RX_DONE event from APPS. Under heavy cpu load
intent worker thread not able to get cpu slot
because it's bound to kernel global work queue.
Due to this QDSP6 firmware faces OOM and it leads
to Q6 crash. Changing intent work queue type to
UNBOUND workqueue ensures intent worker thread
will be executed as early as possible.

Signed-off-by: Manikanta Mylavarapu <[email protected]>
---
drivers/rpmsg/qcom_glink_native.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
index 6f9a439e5046..c3e076bb863f 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -140,6 +140,7 @@ enum {
* @liids: idr of all local intents
* @riids: idr of all remote intents
* @intent_work: worker responsible for transmitting rx_done packets
+ * @intent_wq: work queue of intent_work
* @done_intents: list of intents that needs to be announced rx_done
* @buf: receive buffer, for gathering fragments
* @buf_offset: write offset in @buf
@@ -169,6 +170,7 @@ struct glink_channel {
struct idr liids;
struct idr riids;
struct work_struct intent_work;
+ struct workqueue_struct *intent_wq;
struct list_head done_intents;

struct glink_core_rx_intent *buf;
@@ -231,6 +233,14 @@ static struct glink_channel *qcom_glink_alloc_channel(struct qcom_glink *glink,
INIT_LIST_HEAD(&channel->done_intents);
INIT_WORK(&channel->intent_work, qcom_glink_rx_done_work);

+ channel->intent_wq = alloc_workqueue("intent_wq", WQ_UNBOUND, 1);
+ if (!channel->intent_wq) {
+ pr_err("failed to create %s channel intent work queue\n",
+ channel->name);
+ kfree(channel);
+ return ERR_PTR(-ENOMEM);
+ }
+
idr_init(&channel->liids);
idr_init(&channel->riids);
kref_init(&channel->refcount);
@@ -270,6 +280,7 @@ static void qcom_glink_channel_release(struct kref *ref)
idr_destroy(&channel->riids);
spin_unlock_irqrestore(&channel->intent_lock, flags);

+ destroy_workqueue(channel->intent_wq);
kfree(channel->name);
kfree(channel);
}
@@ -573,7 +584,7 @@ static void qcom_glink_rx_done(struct qcom_glink *glink,
list_add_tail(&intent->node, &channel->done_intents);
spin_unlock(&channel->intent_lock);

- schedule_work(&channel->intent_work);
+ queue_work(channel->intent_wq, &channel->intent_work);
}

/**
--
2.17.1


2023-07-11 12:54:35

by Manikanta Mylavarapu

[permalink] [raw]
Subject: Re: [PATCH 2/2] rpmsg: glink: change intent work queue type



On 6/7/2023 5:47 PM, Manikanta Mylavarapu wrote:
> QDSP6 will clear heap memory once it's received
> RX_DONE event from APPS. Under heavy cpu load
> intent worker thread not able to get cpu slot
> because it's bound to kernel global work queue.
> Due to this QDSP6 firmware faces OOM and it leads
> to Q6 crash. Changing intent work queue type to
> UNBOUND workqueue ensures intent worker thread
> will be executed as early as possible.
>
> Signed-off-by: Manikanta Mylavarapu <[email protected]>
> ---
> drivers/rpmsg/qcom_glink_native.c | 13 ++++++++++++-
> 1 file changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
> index 6f9a439e5046..c3e076bb863f 100644
> --- a/drivers/rpmsg/qcom_glink_native.c
> +++ b/drivers/rpmsg/qcom_glink_native.c
> @@ -140,6 +140,7 @@ enum {
> * @liids: idr of all local intents
> * @riids: idr of all remote intents
> * @intent_work: worker responsible for transmitting rx_done packets
> + * @intent_wq: work queue of intent_work
> * @done_intents: list of intents that needs to be announced rx_done
> * @buf: receive buffer, for gathering fragments
> * @buf_offset: write offset in @buf
> @@ -169,6 +170,7 @@ struct glink_channel {
> struct idr liids;
> struct idr riids;
> struct work_struct intent_work;
> + struct workqueue_struct *intent_wq;
> struct list_head done_intents;
>
> struct glink_core_rx_intent *buf;
> @@ -231,6 +233,14 @@ static struct glink_channel *qcom_glink_alloc_channel(struct qcom_glink *glink,
> INIT_LIST_HEAD(&channel->done_intents);
> INIT_WORK(&channel->intent_work, qcom_glink_rx_done_work);
>
> + channel->intent_wq = alloc_workqueue("intent_wq", WQ_UNBOUND, 1);
> + if (!channel->intent_wq) {
> + pr_err("failed to create %s channel intent work queue\n",
> + channel->name);
> + kfree(channel);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> idr_init(&channel->liids);
> idr_init(&channel->riids);
> kref_init(&channel->refcount);
> @@ -270,6 +280,7 @@ static void qcom_glink_channel_release(struct kref *ref)
> idr_destroy(&channel->riids);
> spin_unlock_irqrestore(&channel->intent_lock, flags);
>
> + destroy_workqueue(channel->intent_wq);
> kfree(channel->name);
> kfree(channel);
> }
> @@ -573,7 +584,7 @@ static void qcom_glink_rx_done(struct qcom_glink *glink,
> list_add_tail(&intent->node, &channel->done_intents);
> spin_unlock(&channel->intent_lock);
>
> - schedule_work(&channel->intent_work);
> + queue_work(channel->intent_wq, &channel->intent_work);
> }
>
> /**

Gentle reminder for review!

Thanks,
Manikanta.

2023-07-15 22:11:13

by Bjorn Andersson

[permalink] [raw]
Subject: Re: [PATCH 2/2] rpmsg: glink: change intent work queue type

On Wed, Jun 07, 2023 at 05:47:31PM +0530, Manikanta Mylavarapu wrote:
> QDSP6 will clear heap memory once it's received
> RX_DONE event from APPS. Under heavy cpu load
> intent worker thread not able to get cpu slot
> because it's bound to kernel global work queue.
> Due to this QDSP6 firmware faces OOM and it leads
> to Q6 crash. Changing intent work queue type to
> UNBOUND workqueue ensures intent worker thread
> will be executed as early as possible.
>

This commit message is specific and the motivation is clear, thank you!


I was hoping to move the rx_done generation directly to
qcom_glink_rx_data(), but I got side tracked on my verification of my
patches for this.

Please update the alloc_workqueue() and we can merge this first, if I
haven't gotten favourable test results until then.


PS. 75 character commit messages please.

Regards,
Bjorn

> Signed-off-by: Manikanta Mylavarapu <[email protected]>
> ---
> drivers/rpmsg/qcom_glink_native.c | 13 ++++++++++++-
> 1 file changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
> index 6f9a439e5046..c3e076bb863f 100644
> --- a/drivers/rpmsg/qcom_glink_native.c
> +++ b/drivers/rpmsg/qcom_glink_native.c
> @@ -140,6 +140,7 @@ enum {
> * @liids: idr of all local intents
> * @riids: idr of all remote intents
> * @intent_work: worker responsible for transmitting rx_done packets
> + * @intent_wq: work queue of intent_work
> * @done_intents: list of intents that needs to be announced rx_done
> * @buf: receive buffer, for gathering fragments
> * @buf_offset: write offset in @buf
> @@ -169,6 +170,7 @@ struct glink_channel {
> struct idr liids;
> struct idr riids;
> struct work_struct intent_work;
> + struct workqueue_struct *intent_wq;
> struct list_head done_intents;
>
> struct glink_core_rx_intent *buf;
> @@ -231,6 +233,14 @@ static struct glink_channel *qcom_glink_alloc_channel(struct qcom_glink *glink,
> INIT_LIST_HEAD(&channel->done_intents);
> INIT_WORK(&channel->intent_work, qcom_glink_rx_done_work);
>
> + channel->intent_wq = alloc_workqueue("intent_wq", WQ_UNBOUND, 1);
> + if (!channel->intent_wq) {
> + pr_err("failed to create %s channel intent work queue\n",
> + channel->name);
> + kfree(channel);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> idr_init(&channel->liids);
> idr_init(&channel->riids);
> kref_init(&channel->refcount);
> @@ -270,6 +280,7 @@ static void qcom_glink_channel_release(struct kref *ref)
> idr_destroy(&channel->riids);
> spin_unlock_irqrestore(&channel->intent_lock, flags);
>
> + destroy_workqueue(channel->intent_wq);
> kfree(channel->name);
> kfree(channel);
> }
> @@ -573,7 +584,7 @@ static void qcom_glink_rx_done(struct qcom_glink *glink,
> list_add_tail(&intent->node, &channel->done_intents);
> spin_unlock(&channel->intent_lock);
>
> - schedule_work(&channel->intent_work);
> + queue_work(channel->intent_wq, &channel->intent_work);
> }
>
> /**
> --
> 2.17.1
>