If cmdq client is multi thread user, racing will occur without mutex
protection. It will make the C message queued in mailbox's queue
always need D message's triggering.
Thread A Thread B Thread C Thread D...
-----------------------------------------------------------------------------------
mbox_send_message()
send_data()
mbox_send_message()
*exit
mbox_send_message()
*exit
mbox_client_txdone()
tx_tick()
mbox_client_txdone()
tx_tick()
mbox_client_txdone()
tx_tick()
msg_submit()
send_data()
msg_submit()
*exit
msg_submit()
*exit
-----------------------------------------------------------------------------------
Signed-off-by: Bibby Hsieh <[email protected]>
---
drivers/soc/mediatek/mtk-cmdq-helper.c | 3 +++
include/linux/soc/mediatek/mtk-cmdq.h | 1 +
2 files changed, 4 insertions(+)
diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c
index 9add0fd5fa6c..9e35e0beffaa 100644
--- a/drivers/soc/mediatek/mtk-cmdq-helper.c
+++ b/drivers/soc/mediatek/mtk-cmdq-helper.c
@@ -81,6 +81,7 @@ struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, u32 timeout)
client->client.dev = dev;
client->client.tx_block = false;
client->chan = mbox_request_channel(&client->client, index);
+ mutex_init(&client->mutex);
if (IS_ERR(client->chan)) {
long err;
@@ -352,9 +353,11 @@ int cmdq_pkt_flush_async(struct cmdq_pkt *pkt, cmdq_async_flush_cb cb,
spin_unlock_irqrestore(&client->lock, flags);
}
+ mutex_lock(&client->mutex);
mbox_send_message(client->chan, pkt);
/* We can send next packet immediately, so just call txdone. */
mbox_client_txdone(client->chan, 0);
+ mutex_unlock(&client->mutex);
return 0;
}
diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
index a74c1d5acdf3..0f9071cd1bc7 100644
--- a/include/linux/soc/mediatek/mtk-cmdq.h
+++ b/include/linux/soc/mediatek/mtk-cmdq.h
@@ -28,6 +28,7 @@ struct cmdq_client {
struct mbox_chan *chan;
struct timer_list timer;
u32 timeout_ms; /* in unit of microsecond */
+ struct mutex mutex;
};
/**
--
2.18.0
Hi, Jassi:
Are mbox_send_message() and mbox_client_txdone() thread-safe? If these
two are thread-safe, this bug should be fixed in mailbox core not
client.
Regards,
CK
On Thu, 2019-11-21 at 15:29 +0800, Bibby Hsieh wrote:
> If cmdq client is multi thread user, racing will occur without mutex
> protection. It will make the C message queued in mailbox's queue
> always need D message's triggering.
>
> Thread A Thread B Thread C Thread D...
> -----------------------------------------------------------------------------------
> mbox_send_message()
> send_data()
> mbox_send_message()
> *exit
> mbox_send_message()
> *exit
> mbox_client_txdone()
> tx_tick()
> mbox_client_txdone()
> tx_tick()
> mbox_client_txdone()
> tx_tick()
> msg_submit()
> send_data()
> msg_submit()
> *exit
> msg_submit()
> *exit
> -----------------------------------------------------------------------------------
>
> Signed-off-by: Bibby Hsieh <[email protected]>
> ---
> drivers/soc/mediatek/mtk-cmdq-helper.c | 3 +++
> include/linux/soc/mediatek/mtk-cmdq.h | 1 +
> 2 files changed, 4 insertions(+)
>
> diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c
> index 9add0fd5fa6c..9e35e0beffaa 100644
> --- a/drivers/soc/mediatek/mtk-cmdq-helper.c
> +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c
> @@ -81,6 +81,7 @@ struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, u32 timeout)
> client->client.dev = dev;
> client->client.tx_block = false;
> client->chan = mbox_request_channel(&client->client, index);
> + mutex_init(&client->mutex);
>
> if (IS_ERR(client->chan)) {
> long err;
> @@ -352,9 +353,11 @@ int cmdq_pkt_flush_async(struct cmdq_pkt *pkt, cmdq_async_flush_cb cb,
> spin_unlock_irqrestore(&client->lock, flags);
> }
>
> + mutex_lock(&client->mutex);
> mbox_send_message(client->chan, pkt);
> /* We can send next packet immediately, so just call txdone. */
> mbox_client_txdone(client->chan, 0);
> + mutex_unlock(&client->mutex);
>
> return 0;
> }
> diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
> index a74c1d5acdf3..0f9071cd1bc7 100644
> --- a/include/linux/soc/mediatek/mtk-cmdq.h
> +++ b/include/linux/soc/mediatek/mtk-cmdq.h
> @@ -28,6 +28,7 @@ struct cmdq_client {
> struct mbox_chan *chan;
> struct timer_list timer;
> u32 timeout_ms; /* in unit of microsecond */
> + struct mutex mutex;
> };
>
> /**
Hi, Jassi:
Ping again.
Are mbox_send_message() and mbox_client_txdone() thread-safe? If these
two are thread-safe, this bug should be fixed in mailbox core not
client.
Regards,
CK
On Wed, 2019-12-04 at 10:22 +0800, CK Hu wrote:
> Hi, Jassi:
>
> Are mbox_send_message() and mbox_client_txdone() thread-safe? If these
> two are thread-safe, this bug should be fixed in mailbox core not
> client.
>
> Regards,
> CK
>
> On Thu, 2019-11-21 at 15:29 +0800, Bibby Hsieh wrote:
> > If cmdq client is multi thread user, racing will occur without mutex
> > protection. It will make the C message queued in mailbox's queue
> > always need D message's triggering.
> >
> > Thread A Thread B Thread C Thread D...
> > -----------------------------------------------------------------------------------
> > mbox_send_message()
> > send_data()
> > mbox_send_message()
> > *exit
> > mbox_send_message()
> > *exit
> > mbox_client_txdone()
> > tx_tick()
> > mbox_client_txdone()
> > tx_tick()
> > mbox_client_txdone()
> > tx_tick()
> > msg_submit()
> > send_data()
> > msg_submit()
> > *exit
> > msg_submit()
> > *exit
> > -----------------------------------------------------------------------------------
> >
> > Signed-off-by: Bibby Hsieh <[email protected]>
> > ---
> > drivers/soc/mediatek/mtk-cmdq-helper.c | 3 +++
> > include/linux/soc/mediatek/mtk-cmdq.h | 1 +
> > 2 files changed, 4 insertions(+)
> >
> > diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c
> > index 9add0fd5fa6c..9e35e0beffaa 100644
> > --- a/drivers/soc/mediatek/mtk-cmdq-helper.c
> > +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c
> > @@ -81,6 +81,7 @@ struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, u32 timeout)
> > client->client.dev = dev;
> > client->client.tx_block = false;
> > client->chan = mbox_request_channel(&client->client, index);
> > + mutex_init(&client->mutex);
> >
> > if (IS_ERR(client->chan)) {
> > long err;
> > @@ -352,9 +353,11 @@ int cmdq_pkt_flush_async(struct cmdq_pkt *pkt, cmdq_async_flush_cb cb,
> > spin_unlock_irqrestore(&client->lock, flags);
> > }
> >
> > + mutex_lock(&client->mutex);
> > mbox_send_message(client->chan, pkt);
> > /* We can send next packet immediately, so just call txdone. */
> > mbox_client_txdone(client->chan, 0);
> > + mutex_unlock(&client->mutex);
> >
> > return 0;
> > }
> > diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
> > index a74c1d5acdf3..0f9071cd1bc7 100644
> > --- a/include/linux/soc/mediatek/mtk-cmdq.h
> > +++ b/include/linux/soc/mediatek/mtk-cmdq.h
> > @@ -28,6 +28,7 @@ struct cmdq_client {
> > struct mbox_chan *chan;
> > struct timer_list timer;
> > u32 timeout_ms; /* in unit of microsecond */
> > + struct mutex mutex;
> > };
> >
> > /**
>
On Tue, Dec 3, 2019 at 8:22 PM CK Hu <[email protected]> wrote:
>
> Hi, Jassi:
>
> Are mbox_send_message() and mbox_client_txdone() thread-safe? If these
> two are thread-safe, this bug should be fixed in mailbox core not
> client.
>
mbox_client_txdone should be called only when the client _knows_ the
message has been sent.
There is difference between knowing when tx is done, and assuming
tx-done because there is no way of knowing it.
Your issue arises because you immediately call mbox_client_txdone
after mbox_send_message, which may be the only way to do it but that
doesn't mean you shouldn't have to take any other precautions (like a
mutex). So I think your patch is reasonable.
Cheers!
> Regards,
> CK
>
> On Thu, 2019-11-21 at 15:29 +0800, Bibby Hsieh wrote:
> > If cmdq client is multi thread user, racing will occur without mutex
> > protection. It will make the C message queued in mailbox's queue
> > always need D message's triggering.
> >
> > Thread A Thread B Thread C Thread D...
> > -----------------------------------------------------------------------------------
> > mbox_send_message()
> > send_data()
> > mbox_send_message()
> > *exit
> > mbox_send_message()
> > *exit
> > mbox_client_txdone()
> > tx_tick()
> > mbox_client_txdone()
> > tx_tick()
> > mbox_client_txdone()
> > tx_tick()
> > msg_submit()
> > send_data()
> > msg_submit()
> > *exit
> > msg_submit()
> > *exit
> > -----------------------------------------------------------------------------------
> >
> > Signed-off-by: Bibby Hsieh <[email protected]>
> > ---
> > drivers/soc/mediatek/mtk-cmdq-helper.c | 3 +++
> > include/linux/soc/mediatek/mtk-cmdq.h | 1 +
> > 2 files changed, 4 insertions(+)
> >
> > diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c
> > index 9add0fd5fa6c..9e35e0beffaa 100644
> > --- a/drivers/soc/mediatek/mtk-cmdq-helper.c
> > +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c
> > @@ -81,6 +81,7 @@ struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, u32 timeout)
> > client->client.dev = dev;
> > client->client.tx_block = false;
> > client->chan = mbox_request_channel(&client->client, index);
> > + mutex_init(&client->mutex);
> >
> > if (IS_ERR(client->chan)) {
> > long err;
> > @@ -352,9 +353,11 @@ int cmdq_pkt_flush_async(struct cmdq_pkt *pkt, cmdq_async_flush_cb cb,
> > spin_unlock_irqrestore(&client->lock, flags);
> > }
> >
> > + mutex_lock(&client->mutex);
> > mbox_send_message(client->chan, pkt);
> > /* We can send next packet immediately, so just call txdone. */
> > mbox_client_txdone(client->chan, 0);
> > + mutex_unlock(&client->mutex);
> >
> > return 0;
> > }
> > diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
> > index a74c1d5acdf3..0f9071cd1bc7 100644
> > --- a/include/linux/soc/mediatek/mtk-cmdq.h
> > +++ b/include/linux/soc/mediatek/mtk-cmdq.h
> > @@ -28,6 +28,7 @@ struct cmdq_client {
> > struct mbox_chan *chan;
> > struct timer_list timer;
> > u32 timeout_ms; /* in unit of microsecond */
> > + struct mutex mutex;
> > };
> >
> > /**
>
Hi, Bibby:
On Thu, 2019-11-21 at 15:29 +0800, Bibby Hsieh wrote:
> If cmdq client is multi thread user, racing will occur without mutex
> protection. It will make the C message queued in mailbox's queue
> always need D message's triggering.
>
> Thread A Thread B Thread C Thread D...
> -----------------------------------------------------------------------------------
> mbox_send_message()
> send_data()
> mbox_send_message()
> *exit
> mbox_send_message()
> *exit
> mbox_client_txdone()
> tx_tick()
> mbox_client_txdone()
> tx_tick()
> mbox_client_txdone()
> tx_tick()
> msg_submit()
> send_data()
> msg_submit()
> *exit
> msg_submit()
> *exit
> -----------------------------------------------------------------------------------
>
> Signed-off-by: Bibby Hsieh <[email protected]>
> ---
> drivers/soc/mediatek/mtk-cmdq-helper.c | 3 +++
> include/linux/soc/mediatek/mtk-cmdq.h | 1 +
> 2 files changed, 4 insertions(+)
>
> diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c
> index 9add0fd5fa6c..9e35e0beffaa 100644
> --- a/drivers/soc/mediatek/mtk-cmdq-helper.c
> +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c
> @@ -81,6 +81,7 @@ struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, u32 timeout)
> client->client.dev = dev;
> client->client.tx_block = false;
> client->chan = mbox_request_channel(&client->client, index);
> + mutex_init(&client->mutex);
>
> if (IS_ERR(client->chan)) {
> long err;
> @@ -352,9 +353,11 @@ int cmdq_pkt_flush_async(struct cmdq_pkt *pkt, cmdq_async_flush_cb cb,
> spin_unlock_irqrestore(&client->lock, flags);
> }
>
> + mutex_lock(&client->mutex);
> mbox_send_message(client->chan, pkt);
> /* We can send next packet immediately, so just call txdone. */
> mbox_client_txdone(client->chan, 0);
> + mutex_unlock(&client->mutex);
In [1], Mediatek DRM is the first client to use cmdq and it already has
its own mutex to protect this. I think helper is something help many
user but now I just see Mediatek MDP [2] need this. For DRM, there are
so many useless code in cmdq_pkt_flush_async(). DRM does not need the
timer to check timeout. DRM could do dma_sync_single_for_cpu() in its
callback and need not to create a intermediate callback to do this. I
would agree this patch only when I see two or more user need this.
[1]
https://github.com/ckhu-mediatek/linux.git-tags/commit/4df12ed1866d1104f631e06218bd15fde512a79e
[2] https://patchwork.kernel.org/patch/10945609/
Regards,
CK
>
> return 0;
> }
> diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
> index a74c1d5acdf3..0f9071cd1bc7 100644
> --- a/include/linux/soc/mediatek/mtk-cmdq.h
> +++ b/include/linux/soc/mediatek/mtk-cmdq.h
> @@ -28,6 +28,7 @@ struct cmdq_client {
> struct mbox_chan *chan;
> struct timer_list timer;
> u32 timeout_ms; /* in unit of microsecond */
> + struct mutex mutex;
> };
>
> /**