From: "Raju P.L.S.S.S.N" <[email protected]>
The tcs->lock was introduced to serialize access with in TCS group. But,
drv->lock is still needed to synchronize core aspects of the
communication. This puts the drv->lock in the critical and high latency
path of sending a request. drv->lock provides the all necessary
synchronization. So remove locking around TCS group and simply use the
drv->lock instead.
Signed-off-by: Raju P.L.S.S.S.N <[email protected]>
[ilina: split patch into multiple files, update commit text]
Signed-off-by: Lina Iyer <[email protected]>
---
Changes in v2:
- Split the patches into multiple
- Optimzation to remove reundant TCS access
- Split the rpmh library changes into its own patch
- Remove locks in IRQ handler
- Update commit text
- Remove fixes in commit text
---
drivers/soc/qcom/rpmh-internal.h | 2 --
drivers/soc/qcom/rpmh-rsc.c | 32 ++++++++++++--------------------
2 files changed, 12 insertions(+), 22 deletions(-)
diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
index a7bbbb67991c..969d5030860e 100644
--- a/drivers/soc/qcom/rpmh-internal.h
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -28,7 +28,6 @@ struct rsc_drv;
* @offset: start of the TCS group relative to the TCSes in the RSC
* @num_tcs: number of TCSes in this type
* @ncpt: number of commands in each TCS
- * @lock: lock for synchronizing this TCS writes
* @req: requests that are sent from the TCS
* @cmd_cache: flattened cache of cmds in sleep/wake TCS
* @slots: indicates which of @cmd_addr are occupied
@@ -40,7 +39,6 @@ struct tcs_group {
u32 offset;
int num_tcs;
int ncpt;
- spinlock_t lock;
const struct tcs_request *req[MAX_TCS_PER_TYPE];
u32 *cmd_cache;
DECLARE_BITMAP(slots, MAX_TCS_SLOTS);
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index e278fc11fe5c..5ede8d6de3ad 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -106,26 +106,26 @@ static int tcs_invalidate(struct rsc_drv *drv, int type)
{
int m;
struct tcs_group *tcs;
+ int ret = 0;
tcs = get_tcs_of_type(drv, type);
- spin_lock(&tcs->lock);
- if (bitmap_empty(tcs->slots, MAX_TCS_SLOTS)) {
- spin_unlock(&tcs->lock);
- return 0;
- }
+ spin_lock(&drv->lock);
+ if (bitmap_empty(tcs->slots, MAX_TCS_SLOTS))
+ goto done_invalidate;
for (m = tcs->offset; m < tcs->offset + tcs->num_tcs; m++) {
if (!tcs_is_free(drv, m)) {
- spin_unlock(&tcs->lock);
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto done_invalidate;
}
write_tcs_reg_sync(drv, RSC_DRV_CMD_ENABLE, m, 0);
write_tcs_reg_sync(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, m, 0);
}
bitmap_zero(tcs->slots, MAX_TCS_SLOTS);
- spin_unlock(&tcs->lock);
+done_invalidate:
+ spin_unlock(&drv->lock);
return 0;
}
@@ -349,41 +349,35 @@ static int tcs_write(struct rsc_drv *drv, const struct tcs_request *msg)
{
struct tcs_group *tcs;
int tcs_id;
- unsigned long flags;
int ret;
tcs = get_tcs_for_msg(drv, msg);
if (IS_ERR(tcs))
return PTR_ERR(tcs);
- spin_lock_irqsave(&tcs->lock, flags);
spin_lock(&drv->lock);
/*
* The h/w does not like if we send a request to the same address,
* when one is already in-flight or being processed.
*/
ret = check_for_req_inflight(drv, tcs, msg);
- if (ret) {
- spin_unlock(&drv->lock);
+ if (ret)
goto done_write;
- }
tcs_id = find_free_tcs(tcs);
if (tcs_id < 0) {
ret = tcs_id;
- spin_unlock(&drv->lock);
goto done_write;
}
tcs->req[tcs_id - tcs->offset] = msg;
set_bit(tcs_id, drv->tcs_in_use);
- spin_unlock(&drv->lock);
__tcs_buffer_write(drv, tcs_id, 0, msg);
__tcs_trigger(drv, tcs_id);
done_write:
- spin_unlock_irqrestore(&tcs->lock, flags);
+ spin_unlock(&drv->lock);
return ret;
}
@@ -481,19 +475,18 @@ static int tcs_ctrl_write(struct rsc_drv *drv, const struct tcs_request *msg)
{
struct tcs_group *tcs;
int tcs_id = 0, cmd_id = 0;
- unsigned long flags;
int ret;
tcs = get_tcs_for_msg(drv, msg);
if (IS_ERR(tcs))
return PTR_ERR(tcs);
- spin_lock_irqsave(&tcs->lock, flags);
+ spin_lock(&drv->lock);
/* find the TCS id and the command in the TCS to write to */
ret = find_slots(tcs, msg, &tcs_id, &cmd_id);
if (!ret)
__tcs_buffer_write(drv, tcs_id, cmd_id, msg);
- spin_unlock_irqrestore(&tcs->lock, flags);
+ spin_unlock(&drv->lock);
return ret;
}
@@ -584,7 +577,6 @@ static int rpmh_probe_tcs_config(struct platform_device *pdev,
tcs->type = tcs_cfg[i].type;
tcs->num_tcs = tcs_cfg[i].n;
tcs->ncpt = ncpt;
- spin_lock_init(&tcs->lock);
if (!tcs->num_tcs || tcs->type == CONTROL_TCS)
continue;
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Since drv->tcs_in_use is updated when the DRV_STATUS is updated, we
could simply use the former to determine if the TCS is idle or not.
Therefore, remove redundant TCS register read.
Signed-off-by: Lina Iyer <[email protected]>
---
drivers/soc/qcom/rpmh-rsc.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index add5e84751c9..b04cd2d2910c 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -93,8 +93,7 @@ static void write_tcs_reg_sync(struct rsc_drv *drv, int reg, int tcs_id,
static bool tcs_is_free(struct rsc_drv *drv, int tcs_id)
{
- return !test_bit(tcs_id, drv->tcs_in_use) &&
- read_tcs_reg(drv, RSC_DRV_STATUS, tcs_id, 0);
+ return !test_bit(tcs_id, drv->tcs_in_use);
}
static struct tcs_group *get_tcs_of_type(struct rsc_drv *drv, int type)
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
From: "Raju P.L.S.S.S.N" <[email protected]>
Switch over from using _irqsave/_irqrestore variants since we no longer
race with a lock from the interrupt handler. While we are at it, rename
the cache_lock to just lock to allow use of the lock to synchronize
controller access.
Signed-off-by: Raju P.L.S.S.S.N <[email protected]>
Signed-off-by: Lina Iyer <[email protected]>
---
drivers/soc/qcom/rpmh-internal.h | 4 ++--
drivers/soc/qcom/rpmh-rsc.c | 2 +-
drivers/soc/qcom/rpmh.c | 21 ++++++++-------------
3 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
index 969d5030860e..93d59db435bb 100644
--- a/drivers/soc/qcom/rpmh-internal.h
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -67,13 +67,13 @@ struct rpmh_request {
* struct rpmh_ctrlr: our representation of the controller
*
* @cache: the list of cached requests
- * @cache_lock: synchronize access to the cache data
+ * @lock: synchronize access to the controller data
* @dirty: was the cache updated since flush
* @batch_cache: Cache sleep and wake requests sent as batch
*/
struct rpmh_ctrlr {
struct list_head cache;
- spinlock_t cache_lock;
+ spinlock_t lock;
bool dirty;
struct list_head batch_cache;
};
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index 694ba881624e..add5e84751c9 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -656,7 +656,7 @@ static int rpmh_rsc_probe(struct platform_device *pdev)
/* Enable the active TCS to send requests immediately */
write_tcs_reg(drv, RSC_DRV_IRQ_ENABLE, 0, drv->tcs[ACTIVE_TCS].mask);
- spin_lock_init(&drv->client.cache_lock);
+ spin_lock_init(&drv->client.lock);
INIT_LIST_HEAD(&drv->client.cache);
INIT_LIST_HEAD(&drv->client.batch_cache);
diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
index 035091fd44b8..d6fb254a4b57 100644
--- a/drivers/soc/qcom/rpmh.c
+++ b/drivers/soc/qcom/rpmh.c
@@ -118,9 +118,8 @@ static struct cache_req *cache_rpm_request(struct rpmh_ctrlr *ctrlr,
struct tcs_cmd *cmd)
{
struct cache_req *req;
- unsigned long flags;
- spin_lock_irqsave(&ctrlr->cache_lock, flags);
+ spin_lock(&ctrlr->lock);
req = __find_req(ctrlr, cmd->addr);
if (req)
goto existing;
@@ -154,7 +153,7 @@ static struct cache_req *cache_rpm_request(struct rpmh_ctrlr *ctrlr,
ctrlr->dirty = true;
unlock:
- spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
+ spin_unlock(&ctrlr->lock);
return req;
}
@@ -283,23 +282,20 @@ EXPORT_SYMBOL(rpmh_write);
static void cache_batch(struct rpmh_ctrlr *ctrlr, struct batch_cache_req *req)
{
- unsigned long flags;
-
- spin_lock_irqsave(&ctrlr->cache_lock, flags);
+ spin_lock(&ctrlr->lock);
list_add_tail(&req->list, &ctrlr->batch_cache);
- spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
+ spin_unlock(&ctrlr->lock);
}
static int flush_batch(struct rpmh_ctrlr *ctrlr)
{
struct batch_cache_req *req;
const struct rpmh_request *rpm_msg;
- unsigned long flags;
int ret = 0;
int i;
/* Send Sleep/Wake requests to the controller, expect no response */
- spin_lock_irqsave(&ctrlr->cache_lock, flags);
+ spin_lock(&ctrlr->lock);
list_for_each_entry(req, &ctrlr->batch_cache, list) {
for (i = 0; i < req->count; i++) {
rpm_msg = req->rpm_msgs + i;
@@ -309,7 +305,7 @@ static int flush_batch(struct rpmh_ctrlr *ctrlr)
break;
}
}
- spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
+ spin_unlock(&ctrlr->lock);
return ret;
}
@@ -317,13 +313,12 @@ static int flush_batch(struct rpmh_ctrlr *ctrlr)
static void invalidate_batch(struct rpmh_ctrlr *ctrlr)
{
struct batch_cache_req *req, *tmp;
- unsigned long flags;
- spin_lock_irqsave(&ctrlr->cache_lock, flags);
+ spin_lock(&ctrlr->lock);
list_for_each_entry_safe(req, tmp, &ctrlr->batch_cache, list)
kfree(req);
INIT_LIST_HEAD(&ctrlr->batch_cache);
- spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
+ spin_unlock(&ctrlr->lock);
}
/**
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Avoid locking in the interrupt context to improve latency. Since we
don't lock in the interrupt context, it is possible that we now could
race with the DRV_CONTROL register that writes the enable register and
cleared by the interrupt handler. For fire-n-forget requests, the
interrupt may be raised as soon as the TCS is triggered and the IRQ
handler may clear the enable bit before the DRV_CONTROL is read back.
Use the non-sync variant when enabling the TCS register to avoid reading
back a value that may been cleared because the interrupt handler ran
immediately after triggering the TCS.
Signed-off-by: Lina Iyer <[email protected]>
---
drivers/soc/qcom/rpmh-rsc.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index 5ede8d6de3ad..694ba881624e 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -242,9 +242,7 @@ static irqreturn_t tcs_tx_done(int irq, void *p)
write_tcs_reg(drv, RSC_DRV_CMD_ENABLE, i, 0);
write_tcs_reg(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, i, 0);
write_tcs_reg(drv, RSC_DRV_IRQ_CLEAR, 0, BIT(i));
- spin_lock(&drv->lock);
clear_bit(i, drv->tcs_in_use);
- spin_unlock(&drv->lock);
if (req)
rpmh_tx_done(req, err);
}
@@ -304,7 +302,7 @@ static void __tcs_trigger(struct rsc_drv *drv, int tcs_id)
enable = TCS_AMC_MODE_ENABLE;
write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
enable |= TCS_AMC_MODE_TRIGGER;
- write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
+ write_tcs_reg(drv, RSC_DRV_CONTROL, tcs_id, enable);
}
static int check_for_req_inflight(struct rsc_drv *drv, struct tcs_group *tcs,
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Quoting Lina Iyer (2019-07-22 14:53:37)
> From: "Raju P.L.S.S.S.N" <[email protected]>
>
> The tcs->lock was introduced to serialize access with in TCS group. But,
> drv->lock is still needed to synchronize core aspects of the
> communication. This puts the drv->lock in the critical and high latency
> path of sending a request. drv->lock provides the all necessary
> synchronization. So remove locking around TCS group and simply use the
> drv->lock instead.
This doesn't talk about removing the irq saving and restoring though.
Can you keep irq saving and restoring in this patch and then remove that
in the next patch with reasoning? It probably isn't safe if the lock is
taken in interrupt context anyway.
>
> Signed-off-by: Raju P.L.S.S.S.N <[email protected]>
> [ilina: split patch into multiple files, update commit text]
> Signed-off-by: Lina Iyer <[email protected]>
> diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
> index a7bbbb67991c..969d5030860e 100644
> --- a/drivers/soc/qcom/rpmh-internal.h
> +++ b/drivers/soc/qcom/rpmh-internal.h
> diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
> index e278fc11fe5c..5ede8d6de3ad 100644
> --- a/drivers/soc/qcom/rpmh-rsc.c
> +++ b/drivers/soc/qcom/rpmh-rsc.c
> @@ -106,26 +106,26 @@ static int tcs_invalidate(struct rsc_drv *drv, int type)
> {
> int m;
> struct tcs_group *tcs;
> + int ret = 0;
>
> tcs = get_tcs_of_type(drv, type);
>
> - spin_lock(&tcs->lock);
> - if (bitmap_empty(tcs->slots, MAX_TCS_SLOTS)) {
> - spin_unlock(&tcs->lock);
> - return 0;
> - }
> + spin_lock(&drv->lock);
> + if (bitmap_empty(tcs->slots, MAX_TCS_SLOTS))
> + goto done_invalidate;
>
> for (m = tcs->offset; m < tcs->offset + tcs->num_tcs; m++) {
> if (!tcs_is_free(drv, m)) {
> - spin_unlock(&tcs->lock);
> - return -EAGAIN;
> + ret = -EAGAIN;
> + goto done_invalidate;
> }
> write_tcs_reg_sync(drv, RSC_DRV_CMD_ENABLE, m, 0);
> write_tcs_reg_sync(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, m, 0);
> }
> bitmap_zero(tcs->slots, MAX_TCS_SLOTS);
> - spin_unlock(&tcs->lock);
>
> +done_invalidate:
> + spin_unlock(&drv->lock);
> return 0;
return ret now?
> }
>
> @@ -349,41 +349,35 @@ static int tcs_write(struct rsc_drv *drv, const struct tcs_request *msg)
> {
> struct tcs_group *tcs;
> int tcs_id;
> - unsigned long flags;
> int ret;
>
> tcs = get_tcs_for_msg(drv, msg);
> if (IS_ERR(tcs))
> return PTR_ERR(tcs);
>
> - spin_lock_irqsave(&tcs->lock, flags);
> spin_lock(&drv->lock);
> /*
> * The h/w does not like if we send a request to the same address,
> * when one is already in-flight or being processed.
> */
> ret = check_for_req_inflight(drv, tcs, msg);
> - if (ret) {
> - spin_unlock(&drv->lock);
> + if (ret)
> goto done_write;
> - }
>
> tcs_id = find_free_tcs(tcs);
> if (tcs_id < 0) {
> ret = tcs_id;
> - spin_unlock(&drv->lock);
> goto done_write;
> }
>
> tcs->req[tcs_id - tcs->offset] = msg;
> set_bit(tcs_id, drv->tcs_in_use);
> - spin_unlock(&drv->lock);
>
> __tcs_buffer_write(drv, tcs_id, 0, msg);
> __tcs_trigger(drv, tcs_id);
>
> done_write:
> - spin_unlock_irqrestore(&tcs->lock, flags);
> + spin_unlock(&drv->lock);
> return ret;
> }
>
> @@ -481,19 +475,18 @@ static int tcs_ctrl_write(struct rsc_drv *drv, const struct tcs_request *msg)
> {
> struct tcs_group *tcs;
> int tcs_id = 0, cmd_id = 0;
> - unsigned long flags;
> int ret;
>
> tcs = get_tcs_for_msg(drv, msg);
> if (IS_ERR(tcs))
> return PTR_ERR(tcs);
>
> - spin_lock_irqsave(&tcs->lock, flags);
> + spin_lock(&drv->lock);
> /* find the TCS id and the command in the TCS to write to */
> ret = find_slots(tcs, msg, &tcs_id, &cmd_id);
> if (!ret)
> __tcs_buffer_write(drv, tcs_id, cmd_id, msg);
> - spin_unlock_irqrestore(&tcs->lock, flags);
> + spin_unlock(&drv->lock);
>
These ones, just leave them doing the irq save restore for now?
Quoting Lina Iyer (2019-07-22 14:53:39)
> From: "Raju P.L.S.S.S.N" <[email protected]>
>
> Switch over from using _irqsave/_irqrestore variants since we no longer
> race with a lock from the interrupt handler. While we are at it, rename
> the cache_lock to just lock to allow use of the lock to synchronize
> controller access.
Is there a reason why it can't be a mutex now?
>
> Signed-off-by: Raju P.L.S.S.S.N <[email protected]>
> Signed-off-by: Lina Iyer <[email protected]>
On Tue, Jul 23 2019 at 12:22 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2019-07-22 14:53:37)
>> From: "Raju P.L.S.S.S.N" <[email protected]>
>>
>> The tcs->lock was introduced to serialize access with in TCS group. But,
>> drv->lock is still needed to synchronize core aspects of the
>> communication. This puts the drv->lock in the critical and high latency
>> path of sending a request. drv->lock provides the all necessary
>> synchronization. So remove locking around TCS group and simply use the
>> drv->lock instead.
>
>This doesn't talk about removing the irq saving and restoring though.
You mean for drv->lock? It was not an _irqsave/_irqrestore anyways and
we were only removing the tcs->lock.
>Can you keep irq saving and restoring in this patch and then remove that
>in the next patch with reasoning? It probably isn't safe if the lock is
>taken in interrupt context anyway.
>
Yes, the drv->lock should have been irqsave/irqrestore, but it hasn't
been changed by this patch.
>>
>> Signed-off-by: Raju P.L.S.S.S.N <[email protected]>
>> [ilina: split patch into multiple files, update commit text]
>> Signed-off-by: Lina Iyer <[email protected]>
>
>> diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
>> index a7bbbb67991c..969d5030860e 100644
>> --- a/drivers/soc/qcom/rpmh-internal.h
>> +++ b/drivers/soc/qcom/rpmh-internal.h
>> diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
>> index e278fc11fe5c..5ede8d6de3ad 100644
>> --- a/drivers/soc/qcom/rpmh-rsc.c
>> +++ b/drivers/soc/qcom/rpmh-rsc.c
>> @@ -106,26 +106,26 @@ static int tcs_invalidate(struct rsc_drv *drv, int type)
>> {
>> int m;
>> struct tcs_group *tcs;
>> + int ret = 0;
>>
>> tcs = get_tcs_of_type(drv, type);
>>
>> - spin_lock(&tcs->lock);
>> - if (bitmap_empty(tcs->slots, MAX_TCS_SLOTS)) {
>> - spin_unlock(&tcs->lock);
>> - return 0;
>> - }
>> + spin_lock(&drv->lock);
>> + if (bitmap_empty(tcs->slots, MAX_TCS_SLOTS))
>> + goto done_invalidate;
>>
>> for (m = tcs->offset; m < tcs->offset + tcs->num_tcs; m++) {
>> if (!tcs_is_free(drv, m)) {
>> - spin_unlock(&tcs->lock);
>> - return -EAGAIN;
>> + ret = -EAGAIN;
>> + goto done_invalidate;
>> }
>> write_tcs_reg_sync(drv, RSC_DRV_CMD_ENABLE, m, 0);
>> write_tcs_reg_sync(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, m, 0);
>> }
>> bitmap_zero(tcs->slots, MAX_TCS_SLOTS);
>> - spin_unlock(&tcs->lock);
>>
>> +done_invalidate:
>> + spin_unlock(&drv->lock);
>> return 0;
>
>return ret now?
>
Yes, will do.
>> }
>>
>> @@ -349,41 +349,35 @@ static int tcs_write(struct rsc_drv *drv, const struct tcs_request *msg)
>> {
>> struct tcs_group *tcs;
>> int tcs_id;
>> - unsigned long flags;
>> int ret;
>>
>> tcs = get_tcs_for_msg(drv, msg);
>> if (IS_ERR(tcs))
>> return PTR_ERR(tcs);
>>
>> - spin_lock_irqsave(&tcs->lock, flags);
>> spin_lock(&drv->lock);
>> /*
>> * The h/w does not like if we send a request to the same address,
>> * when one is already in-flight or being processed.
>> */
>> ret = check_for_req_inflight(drv, tcs, msg);
>> - if (ret) {
>> - spin_unlock(&drv->lock);
>> + if (ret)
>> goto done_write;
>> - }
>>
>> tcs_id = find_free_tcs(tcs);
>> if (tcs_id < 0) {
>> ret = tcs_id;
>> - spin_unlock(&drv->lock);
>> goto done_write;
>> }
>>
>> tcs->req[tcs_id - tcs->offset] = msg;
>> set_bit(tcs_id, drv->tcs_in_use);
>> - spin_unlock(&drv->lock);
>>
>> __tcs_buffer_write(drv, tcs_id, 0, msg);
>> __tcs_trigger(drv, tcs_id);
>>
>> done_write:
>> - spin_unlock_irqrestore(&tcs->lock, flags);
>> + spin_unlock(&drv->lock);
>> return ret;
>> }
>>
>> @@ -481,19 +475,18 @@ static int tcs_ctrl_write(struct rsc_drv *drv, const struct tcs_request *msg)
>> {
>> struct tcs_group *tcs;
>> int tcs_id = 0, cmd_id = 0;
>> - unsigned long flags;
>> int ret;
>>
>> tcs = get_tcs_for_msg(drv, msg);
>> if (IS_ERR(tcs))
>> return PTR_ERR(tcs);
>>
>> - spin_lock_irqsave(&tcs->lock, flags);
>> + spin_lock(&drv->lock);
>> /* find the TCS id and the command in the TCS to write to */
>> ret = find_slots(tcs, msg, &tcs_id, &cmd_id);
>> if (!ret)
>> __tcs_buffer_write(drv, tcs_id, cmd_id, msg);
>> - spin_unlock_irqrestore(&tcs->lock, flags);
>> + spin_unlock(&drv->lock);
>>
>
>These ones, just leave them doing the irq save restore for now?
>
drv->lock ??
--Lina
Quoting Lina Iyer (2019-07-22 14:53:38)
> Avoid locking in the interrupt context to improve latency. Since we
> don't lock in the interrupt context, it is possible that we now could
> race with the DRV_CONTROL register that writes the enable register and
> cleared by the interrupt handler. For fire-n-forget requests, the
> interrupt may be raised as soon as the TCS is triggered and the IRQ
> handler may clear the enable bit before the DRV_CONTROL is read back.
>
> Use the non-sync variant when enabling the TCS register to avoid reading
> back a value that may been cleared because the interrupt handler ran
> immediately after triggering the TCS.
>
> Signed-off-by: Lina Iyer <[email protected]>
> ---
I have to read this patch carefully. The commit text isn't convincing me
that it is actually safe to make this change. It mostly talks about the
performance improvements and how we need to fix __tcs_trigger(), which
is good, but I was hoping to be convinced that not grabbing the lock
here is safe.
How do we ensure that drv->tcs_in_use is cleared before we call
tcs_write() and try to look for a free bit? Isn't it possible that we'll
get into a situation where the bitmap is all used up but the hardware
has just received an interrupt and is going to clear out a bit and then
an rpmh write fails with -EBUSY?
> drivers/soc/qcom/rpmh-rsc.c | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
> index 5ede8d6de3ad..694ba881624e 100644
> --- a/drivers/soc/qcom/rpmh-rsc.c
> +++ b/drivers/soc/qcom/rpmh-rsc.c
> @@ -242,9 +242,7 @@ static irqreturn_t tcs_tx_done(int irq, void *p)
> write_tcs_reg(drv, RSC_DRV_CMD_ENABLE, i, 0);
> write_tcs_reg(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, i, 0);
> write_tcs_reg(drv, RSC_DRV_IRQ_CLEAR, 0, BIT(i));
> - spin_lock(&drv->lock);
> clear_bit(i, drv->tcs_in_use);
> - spin_unlock(&drv->lock);
> if (req)
> rpmh_tx_done(req, err);
> }
> @@ -304,7 +302,7 @@ static void __tcs_trigger(struct rsc_drv *drv, int tcs_id)
> enable = TCS_AMC_MODE_ENABLE;
> write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
> enable |= TCS_AMC_MODE_TRIGGER;
> - write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
> + write_tcs_reg(drv, RSC_DRV_CONTROL, tcs_id, enable);
> }
>
> static int check_for_req_inflight(struct rsc_drv *drv, struct tcs_group *tcs,
Quoting Lina Iyer (2019-07-23 12:21:59)
> On Tue, Jul 23 2019 at 12:22 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2019-07-22 14:53:37)
> >> From: "Raju P.L.S.S.S.N" <[email protected]>
> >>
> >> The tcs->lock was introduced to serialize access with in TCS group. But,
> >> drv->lock is still needed to synchronize core aspects of the
> >> communication. This puts the drv->lock in the critical and high latency
> >> path of sending a request. drv->lock provides the all necessary
> >> synchronization. So remove locking around TCS group and simply use the
> >> drv->lock instead.
> >
> >This doesn't talk about removing the irq saving and restoring though.
> You mean for drv->lock? It was not an _irqsave/_irqrestore anyways and
> we were only removing the tcs->lock.
Yes drv->lock wasn't an irqsave/restore variant because it was a
spinlock inside of an obviously already irqsaved region of code because
the tcs->lock was outside the drv->lock and that was saving the irq
flags.
>
> >Can you keep irq saving and restoring in this patch and then remove that
> >in the next patch with reasoning? It probably isn't safe if the lock is
> >taken in interrupt context anyway.
> >
> Yes, the drv->lock should have been irqsave/irqrestore, but it hasn't
> been changed by this patch.
It needs to be changed to maintain the irqsaving/restoring of the code.
> >> @@ -349,41 +349,35 @@ static int tcs_write(struct rsc_drv *drv, const struct tcs_request *msg)
> >> {
> >> struct tcs_group *tcs;
> >> int tcs_id;
> >> - unsigned long flags;
> >> int ret;
> >>
> >> tcs = get_tcs_for_msg(drv, msg);
> >> if (IS_ERR(tcs))
> >> return PTR_ERR(tcs);
> >>
> >> - spin_lock_irqsave(&tcs->lock, flags);
> >> spin_lock(&drv->lock);
> >> /*
> >> * The h/w does not like if we send a request to the same address,
> >> * when one is already in-flight or being processed.
> >> */
> >> ret = check_for_req_inflight(drv, tcs, msg);
> >> - if (ret) {
> >> - spin_unlock(&drv->lock);
> >> + if (ret)
> >> goto done_write;
> >> - }
> >>
> >> tcs_id = find_free_tcs(tcs);
> >> if (tcs_id < 0) {
> >> ret = tcs_id;
> >> - spin_unlock(&drv->lock);
> >> goto done_write;
> >> }
> >>
> >> tcs->req[tcs_id - tcs->offset] = msg;
> >> set_bit(tcs_id, drv->tcs_in_use);
> >> - spin_unlock(&drv->lock);
> >>
> >> __tcs_buffer_write(drv, tcs_id, 0, msg);
> >> __tcs_trigger(drv, tcs_id);
> >>
> >> done_write:
> >> - spin_unlock_irqrestore(&tcs->lock, flags);
> >> + spin_unlock(&drv->lock);
> >> return ret;
> >> }
> >>
> >> @@ -481,19 +475,18 @@ static int tcs_ctrl_write(struct rsc_drv *drv, const struct tcs_request *msg)
> >> {
> >> struct tcs_group *tcs;
> >> int tcs_id = 0, cmd_id = 0;
> >> - unsigned long flags;
> >> int ret;
> >>
> >> tcs = get_tcs_for_msg(drv, msg);
> >> if (IS_ERR(tcs))
> >> return PTR_ERR(tcs);
> >>
> >> - spin_lock_irqsave(&tcs->lock, flags);
> >> + spin_lock(&drv->lock);
> >> /* find the TCS id and the command in the TCS to write to */
> >> ret = find_slots(tcs, msg, &tcs_id, &cmd_id);
> >> if (!ret)
> >> __tcs_buffer_write(drv, tcs_id, cmd_id, msg);
> >> - spin_unlock_irqrestore(&tcs->lock, flags);
> >> + spin_unlock(&drv->lock);
> >>
> >
> >These ones, just leave them doing the irq save restore for now?
> >
> drv->lock ??
>
Yes, it should have irq save/restore still.
On Tue, Jul 23 2019 at 14:11 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2019-07-22 14:53:38)
>> Avoid locking in the interrupt context to improve latency. Since we
>> don't lock in the interrupt context, it is possible that we now could
>> race with the DRV_CONTROL register that writes the enable register and
>> cleared by the interrupt handler. For fire-n-forget requests, the
>> interrupt may be raised as soon as the TCS is triggered and the IRQ
>> handler may clear the enable bit before the DRV_CONTROL is read back.
>>
>> Use the non-sync variant when enabling the TCS register to avoid reading
>> back a value that may been cleared because the interrupt handler ran
>> immediately after triggering the TCS.
>>
>> Signed-off-by: Lina Iyer <[email protected]>
>> ---
>
>I have to read this patch carefully. The commit text isn't convincing me
>that it is actually safe to make this change. It mostly talks about the
>performance improvements and how we need to fix __tcs_trigger(), which
>is good, but I was hoping to be convinced that not grabbing the lock
>here is safe.
>
>How do we ensure that drv->tcs_in_use is cleared before we call
>tcs_write() and try to look for a free bit? Isn't it possible that we'll
>get into a situation where the bitmap is all used up but the hardware
>has just received an interrupt and is going to clear out a bit and then
>an rpmh write fails with -EBUSY?
>
If we have a situation where there are no available free bits, we retry
and that is part of the function. Since we have only 2 TCSes avaialble
to write to the hardware and there could be multiple requests coming in,
it is a very common situation. We try and acquire the drv->lock and if
there are free TCS available and if available mark them busy and send
our requests. If there are none available, we keep retrying.
>> drivers/soc/qcom/rpmh-rsc.c | 4 +---
>> 1 file changed, 1 insertion(+), 3 deletions(-)
>>
>> diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
>> index 5ede8d6de3ad..694ba881624e 100644
>> --- a/drivers/soc/qcom/rpmh-rsc.c
>> +++ b/drivers/soc/qcom/rpmh-rsc.c
>> @@ -242,9 +242,7 @@ static irqreturn_t tcs_tx_done(int irq, void *p)
>> write_tcs_reg(drv, RSC_DRV_CMD_ENABLE, i, 0);
>> write_tcs_reg(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, i, 0);
>> write_tcs_reg(drv, RSC_DRV_IRQ_CLEAR, 0, BIT(i));
>> - spin_lock(&drv->lock);
>> clear_bit(i, drv->tcs_in_use);
>> - spin_unlock(&drv->lock);
>> if (req)
>> rpmh_tx_done(req, err);
>> }
>> @@ -304,7 +302,7 @@ static void __tcs_trigger(struct rsc_drv *drv, int tcs_id)
>> enable = TCS_AMC_MODE_ENABLE;
>> write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
>> enable |= TCS_AMC_MODE_TRIGGER;
>> - write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
>> + write_tcs_reg(drv, RSC_DRV_CONTROL, tcs_id, enable);
>> }
>>
>> static int check_for_req_inflight(struct rsc_drv *drv, struct tcs_group *tcs,
On Tue, Jul 23 2019 at 14:19 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2019-07-23 12:21:59)
>> On Tue, Jul 23 2019 at 12:22 -0600, Stephen Boyd wrote:
>> >Quoting Lina Iyer (2019-07-22 14:53:37)
>> >> From: "Raju P.L.S.S.S.N" <[email protected]>
>> >>
>> >> The tcs->lock was introduced to serialize access with in TCS group. But,
>> >> drv->lock is still needed to synchronize core aspects of the
>> >> communication. This puts the drv->lock in the critical and high latency
>> >> path of sending a request. drv->lock provides the all necessary
>> >> synchronization. So remove locking around TCS group and simply use the
>> >> drv->lock instead.
>> >
>> >This doesn't talk about removing the irq saving and restoring though.
>> You mean for drv->lock? It was not an _irqsave/_irqrestore anyways and
>> we were only removing the tcs->lock.
>
>Yes drv->lock wasn't an irqsave/restore variant because it was a
>spinlock inside of an obviously already irqsaved region of code because
>the tcs->lock was outside the drv->lock and that was saving the irq
>flags.
>
Oh, right.
>>
>> >Can you keep irq saving and restoring in this patch and then remove that
>> >in the next patch with reasoning? It probably isn't safe if the lock is
>> >taken in interrupt context anyway.
>> >
>> Yes, the drv->lock should have been irqsave/irqrestore, but it hasn't
>> been changed by this patch.
>
>It needs to be changed to maintain the irqsaving/restoring of the code.
>
May be I should club this with the following patch. Instead of adding
irqsave and restore to drv->lock and then remvoing them again in the
following patch.
>> >> @@ -349,41 +349,35 @@ static int tcs_write(struct rsc_drv *drv, const struct tcs_request *msg)
>> >> {
>> >> struct tcs_group *tcs;
>> >> int tcs_id;
>> >> - unsigned long flags;
>> >> int ret;
>> >>
>> >> tcs = get_tcs_for_msg(drv, msg);
>> >> if (IS_ERR(tcs))
>> >> return PTR_ERR(tcs);
>> >>
>> >> - spin_lock_irqsave(&tcs->lock, flags);
>> >> spin_lock(&drv->lock);
>> >> /*
>> >> * The h/w does not like if we send a request to the same address,
>> >> * when one is already in-flight or being processed.
>> >> */
>> >> ret = check_for_req_inflight(drv, tcs, msg);
>> >> - if (ret) {
>> >> - spin_unlock(&drv->lock);
>> >> + if (ret)
>> >> goto done_write;
>> >> - }
>> >>
>> >> tcs_id = find_free_tcs(tcs);
>> >> if (tcs_id < 0) {
>> >> ret = tcs_id;
>> >> - spin_unlock(&drv->lock);
>> >> goto done_write;
>> >> }
>> >>
>> >> tcs->req[tcs_id - tcs->offset] = msg;
>> >> set_bit(tcs_id, drv->tcs_in_use);
>> >> - spin_unlock(&drv->lock);
>> >>
>> >> __tcs_buffer_write(drv, tcs_id, 0, msg);
>> >> __tcs_trigger(drv, tcs_id);
>> >>
>> >> done_write:
>> >> - spin_unlock_irqrestore(&tcs->lock, flags);
>> >> + spin_unlock(&drv->lock);
>> >> return ret;
>> >> }
>> >>
>> >> @@ -481,19 +475,18 @@ static int tcs_ctrl_write(struct rsc_drv *drv, const struct tcs_request *msg)
>> >> {
>> >> struct tcs_group *tcs;
>> >> int tcs_id = 0, cmd_id = 0;
>> >> - unsigned long flags;
>> >> int ret;
>> >>
>> >> tcs = get_tcs_for_msg(drv, msg);
>> >> if (IS_ERR(tcs))
>> >> return PTR_ERR(tcs);
>> >>
>> >> - spin_lock_irqsave(&tcs->lock, flags);
>> >> + spin_lock(&drv->lock);
>> >> /* find the TCS id and the command in the TCS to write to */
>> >> ret = find_slots(tcs, msg, &tcs_id, &cmd_id);
>> >> if (!ret)
>> >> __tcs_buffer_write(drv, tcs_id, cmd_id, msg);
>> >> - spin_unlock_irqrestore(&tcs->lock, flags);
>> >> + spin_unlock(&drv->lock);
>> >>
>> >
>> >These ones, just leave them doing the irq save restore for now?
>> >
>> drv->lock ??
>>
>
>Yes, it should have irq save/restore still.
>
On Wed, Jul 24 2019 at 12:32 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2019-07-24 07:54:52)
>> On Tue, Jul 23 2019 at 14:19 -0600, Stephen Boyd wrote:
>> >Quoting Lina Iyer (2019-07-23 12:21:59)
>> >> On Tue, Jul 23 2019 at 12:22 -0600, Stephen Boyd wrote:
>> >> >Can you keep irq saving and restoring in this patch and then remove that
>> >> >in the next patch with reasoning? It probably isn't safe if the lock is
>> >> >taken in interrupt context anyway.
>> >> >
>> >> Yes, the drv->lock should have been irqsave/irqrestore, but it hasn't
>> >> been changed by this patch.
>> >
>> >It needs to be changed to maintain the irqsaving/restoring of the code.
>> >
>> May be I should club this with the following patch. Instead of adding
>> irqsave and restore to drv->lock and then remvoing them again in the
>> following patch.
>>
>
>I suspect that gets us back to v1 of this patch series? I'd prefer you
>just keep the save/restore of irqs in this patch and then remove them
>later. Or if the order can be the other way, where we remove grabbing
>the lock in irq context comes first and then consolidate the locks into
>one it might work.
>
Patches 1 and 3 need not be bundled. We can keep them separate to help
understand the change better.
This patch order - #2, #1, #3, #4 would work.
--Lina
Quoting Lina Iyer (2019-07-24 07:52:51)
> On Tue, Jul 23 2019 at 14:11 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2019-07-22 14:53:38)
> >> Avoid locking in the interrupt context to improve latency. Since we
> >> don't lock in the interrupt context, it is possible that we now could
> >> race with the DRV_CONTROL register that writes the enable register and
> >> cleared by the interrupt handler. For fire-n-forget requests, the
> >> interrupt may be raised as soon as the TCS is triggered and the IRQ
> >> handler may clear the enable bit before the DRV_CONTROL is read back.
> >>
> >> Use the non-sync variant when enabling the TCS register to avoid reading
> >> back a value that may been cleared because the interrupt handler ran
> >> immediately after triggering the TCS.
> >>
> >> Signed-off-by: Lina Iyer <[email protected]>
> >> ---
> >
> >I have to read this patch carefully. The commit text isn't convincing me
> >that it is actually safe to make this change. It mostly talks about the
> >performance improvements and how we need to fix __tcs_trigger(), which
> >is good, but I was hoping to be convinced that not grabbing the lock
> >here is safe.
> >
> >How do we ensure that drv->tcs_in_use is cleared before we call
> >tcs_write() and try to look for a free bit? Isn't it possible that we'll
> >get into a situation where the bitmap is all used up but the hardware
> >has just received an interrupt and is going to clear out a bit and then
> >an rpmh write fails with -EBUSY?
> >
> If we have a situation where there are no available free bits, we retry
> and that is part of the function. Since we have only 2 TCSes avaialble
> to write to the hardware and there could be multiple requests coming in,
> it is a very common situation. We try and acquire the drv->lock and if
> there are free TCS available and if available mark them busy and send
> our requests. If there are none available, we keep retrying.
>
Ok. I wonder if we need some sort of barriers here too, like an
smp_mb__after_atomic()? That way we can make sure that the write to
clear the bit is seen by another CPU that could be spinning forever
waiting for that bit to be cleared? Before this change the spinlock
would be guaranteed to make these barriers for us, but now that doesn't
seem to be the case. I really hope that this whole thing can be changed
to be a mutex though, in which case we can use the bit_wait() API, etc.
to put tasks to sleep while RPMh is processing things.
On Wed, Jul 24 2019 at 13:38 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2019-07-24 07:52:51)
>> On Tue, Jul 23 2019 at 14:11 -0600, Stephen Boyd wrote:
>> >Quoting Lina Iyer (2019-07-22 14:53:38)
>> >> Avoid locking in the interrupt context to improve latency. Since we
>> >> don't lock in the interrupt context, it is possible that we now could
>> >> race with the DRV_CONTROL register that writes the enable register and
>> >> cleared by the interrupt handler. For fire-n-forget requests, the
>> >> interrupt may be raised as soon as the TCS is triggered and the IRQ
>> >> handler may clear the enable bit before the DRV_CONTROL is read back.
>> >>
>> >> Use the non-sync variant when enabling the TCS register to avoid reading
>> >> back a value that may been cleared because the interrupt handler ran
>> >> immediately after triggering the TCS.
>> >>
>> >> Signed-off-by: Lina Iyer <[email protected]>
>> >> ---
>> >
>> >I have to read this patch carefully. The commit text isn't convincing me
>> >that it is actually safe to make this change. It mostly talks about the
>> >performance improvements and how we need to fix __tcs_trigger(), which
>> >is good, but I was hoping to be convinced that not grabbing the lock
>> >here is safe.
>> >
>> >How do we ensure that drv->tcs_in_use is cleared before we call
>> >tcs_write() and try to look for a free bit? Isn't it possible that we'll
>> >get into a situation where the bitmap is all used up but the hardware
>> >has just received an interrupt and is going to clear out a bit and then
>> >an rpmh write fails with -EBUSY?
>> >
>> If we have a situation where there are no available free bits, we retry
>> and that is part of the function. Since we have only 2 TCSes avaialble
>> to write to the hardware and there could be multiple requests coming in,
>> it is a very common situation. We try and acquire the drv->lock and if
>> there are free TCS available and if available mark them busy and send
>> our requests. If there are none available, we keep retrying.
>>
>
>Ok. I wonder if we need some sort of barriers here too, like an
>smp_mb__after_atomic()? That way we can make sure that the write to
>clear the bit is seen by another CPU that could be spinning forever
>waiting for that bit to be cleared? Before this change the spinlock
>would be guaranteed to make these barriers for us, but now that doesn't
>seem to be the case. I really hope that this whole thing can be changed
>to be a mutex though, in which case we can use the bit_wait() API, etc.
>to put tasks to sleep while RPMh is processing things.
>
We have drivers that want to send requests in atomic contexts and
therefore mutex locks would not work.
--Lina
Quoting Lina Iyer (2019-07-24 07:54:52)
> On Tue, Jul 23 2019 at 14:19 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2019-07-23 12:21:59)
> >> On Tue, Jul 23 2019 at 12:22 -0600, Stephen Boyd wrote:
> >> >Can you keep irq saving and restoring in this patch and then remove that
> >> >in the next patch with reasoning? It probably isn't safe if the lock is
> >> >taken in interrupt context anyway.
> >> >
> >> Yes, the drv->lock should have been irqsave/irqrestore, but it hasn't
> >> been changed by this patch.
> >
> >It needs to be changed to maintain the irqsaving/restoring of the code.
> >
> May be I should club this with the following patch. Instead of adding
> irqsave and restore to drv->lock and then remvoing them again in the
> following patch.
>
I suspect that gets us back to v1 of this patch series? I'd prefer you
just keep the save/restore of irqs in this patch and then remove them
later. Or if the order can be the other way, where we remove grabbing
the lock in irq context comes first and then consolidate the locks into
one it might work.
Hi,
On Wed, Jul 24, 2019 at 1:36 PM Lina Iyer <[email protected]> wrote:
>
> On Wed, Jul 24 2019 at 13:38 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2019-07-24 07:52:51)
> >> On Tue, Jul 23 2019 at 14:11 -0600, Stephen Boyd wrote:
> >> >Quoting Lina Iyer (2019-07-22 14:53:38)
> >> >> Avoid locking in the interrupt context to improve latency. Since we
> >> >> don't lock in the interrupt context, it is possible that we now could
> >> >> race with the DRV_CONTROL register that writes the enable register and
> >> >> cleared by the interrupt handler. For fire-n-forget requests, the
> >> >> interrupt may be raised as soon as the TCS is triggered and the IRQ
> >> >> handler may clear the enable bit before the DRV_CONTROL is read back.
> >> >>
> >> >> Use the non-sync variant when enabling the TCS register to avoid reading
> >> >> back a value that may been cleared because the interrupt handler ran
> >> >> immediately after triggering the TCS.
> >> >>
> >> >> Signed-off-by: Lina Iyer <[email protected]>
> >> >> ---
> >> >
> >> >I have to read this patch carefully. The commit text isn't convincing me
> >> >that it is actually safe to make this change. It mostly talks about the
> >> >performance improvements and how we need to fix __tcs_trigger(), which
> >> >is good, but I was hoping to be convinced that not grabbing the lock
> >> >here is safe.
> >> >
> >> >How do we ensure that drv->tcs_in_use is cleared before we call
> >> >tcs_write() and try to look for a free bit? Isn't it possible that we'll
> >> >get into a situation where the bitmap is all used up but the hardware
> >> >has just received an interrupt and is going to clear out a bit and then
> >> >an rpmh write fails with -EBUSY?
> >> >
> >> If we have a situation where there are no available free bits, we retry
> >> and that is part of the function. Since we have only 2 TCSes avaialble
> >> to write to the hardware and there could be multiple requests coming in,
> >> it is a very common situation. We try and acquire the drv->lock and if
> >> there are free TCS available and if available mark them busy and send
> >> our requests. If there are none available, we keep retrying.
> >>
> >
> >Ok. I wonder if we need some sort of barriers here too, like an
> >smp_mb__after_atomic()? That way we can make sure that the write to
> >clear the bit is seen by another CPU that could be spinning forever
> >waiting for that bit to be cleared? Before this change the spinlock
> >would be guaranteed to make these barriers for us, but now that doesn't
> >seem to be the case. I really hope that this whole thing can be changed
> >to be a mutex though, in which case we can use the bit_wait() API, etc.
> >to put tasks to sleep while RPMh is processing things.
> >
> We have drivers that want to send requests in atomic contexts and
> therefore mutex locks would not work.
Jumping in without reading all the context, but I saw this fly by and
it seemed odd. If I'm way off base then please ignore...
Can you give more details? Why are these drivers in atomic contexts?
If they are in atomic contexts because they are running in the context
of an interrupt then your next patch in the series isn't so correct.
Also: when people submit requests in atomic context are they always
submitting an asynchronous request? In that case we could
(presumably) just use a spinlock to protect the queue of async
requests and a mutex for everything else?
-Doug
On Wed, Jul 24 2019 at 17:28 -0600, Doug Anderson wrote:
>Hi,
>
>On Wed, Jul 24, 2019 at 1:36 PM Lina Iyer <[email protected]> wrote:
>>
>> On Wed, Jul 24 2019 at 13:38 -0600, Stephen Boyd wrote:
>> >Quoting Lina Iyer (2019-07-24 07:52:51)
>> >> On Tue, Jul 23 2019 at 14:11 -0600, Stephen Boyd wrote:
>> >> >Quoting Lina Iyer (2019-07-22 14:53:38)
>> >> >> Avoid locking in the interrupt context to improve latency. Since we
>> >> >> don't lock in the interrupt context, it is possible that we now could
>> >> >> race with the DRV_CONTROL register that writes the enable register and
>> >> >> cleared by the interrupt handler. For fire-n-forget requests, the
>> >> >> interrupt may be raised as soon as the TCS is triggered and the IRQ
>> >> >> handler may clear the enable bit before the DRV_CONTROL is read back.
>> >> >>
>> >> >> Use the non-sync variant when enabling the TCS register to avoid reading
>> >> >> back a value that may been cleared because the interrupt handler ran
>> >> >> immediately after triggering the TCS.
>> >> >>
>> >> >> Signed-off-by: Lina Iyer <[email protected]>
>> >> >> ---
>> >> >
>> >> >I have to read this patch carefully. The commit text isn't convincing me
>> >> >that it is actually safe to make this change. It mostly talks about the
>> >> >performance improvements and how we need to fix __tcs_trigger(), which
>> >> >is good, but I was hoping to be convinced that not grabbing the lock
>> >> >here is safe.
>> >> >
>> >> >How do we ensure that drv->tcs_in_use is cleared before we call
>> >> >tcs_write() and try to look for a free bit? Isn't it possible that we'll
>> >> >get into a situation where the bitmap is all used up but the hardware
>> >> >has just received an interrupt and is going to clear out a bit and then
>> >> >an rpmh write fails with -EBUSY?
>> >> >
>> >> If we have a situation where there are no available free bits, we retry
>> >> and that is part of the function. Since we have only 2 TCSes avaialble
>> >> to write to the hardware and there could be multiple requests coming in,
>> >> it is a very common situation. We try and acquire the drv->lock and if
>> >> there are free TCS available and if available mark them busy and send
>> >> our requests. If there are none available, we keep retrying.
>> >>
>> >
>> >Ok. I wonder if we need some sort of barriers here too, like an
>> >smp_mb__after_atomic()? That way we can make sure that the write to
>> >clear the bit is seen by another CPU that could be spinning forever
>> >waiting for that bit to be cleared? Before this change the spinlock
>> >would be guaranteed to make these barriers for us, but now that doesn't
>> >seem to be the case. I really hope that this whole thing can be changed
>> >to be a mutex though, in which case we can use the bit_wait() API, etc.
>> >to put tasks to sleep while RPMh is processing things.
>> >
>> We have drivers that want to send requests in atomic contexts and
>> therefore mutex locks would not work.
>
>Jumping in without reading all the context, but I saw this fly by and
>it seemed odd. If I'm way off base then please ignore...
>
>Can you give more details? Why are these drivers in atomic contexts?
>If they are in atomic contexts because they are running in the context
>of an interrupt then your next patch in the series isn't so correct.
>
>Also: when people submit requests in atomic context are they always
>submitting an asynchronous request? In that case we could
>(presumably) just use a spinlock to protect the queue of async
>requests and a mutex for everything else?
Yes, drivers only make async requests in interrupt contexts. They cannot
use the sync variants. The async and sync variants are streamlined into
the same code path. Hence the use of spinlocks instead of mutexes
through the critical path.
--Lina
Hi,
On Thu, Jul 25, 2019 at 8:18 AM Lina Iyer <[email protected]> wrote:
>
> On Wed, Jul 24 2019 at 17:28 -0600, Doug Anderson wrote:
> >Hi,
> >
> >On Wed, Jul 24, 2019 at 1:36 PM Lina Iyer <[email protected]> wrote:
> >>
> >> On Wed, Jul 24 2019 at 13:38 -0600, Stephen Boyd wrote:
> >> >Quoting Lina Iyer (2019-07-24 07:52:51)
> >> >> On Tue, Jul 23 2019 at 14:11 -0600, Stephen Boyd wrote:
> >> >> >Quoting Lina Iyer (2019-07-22 14:53:38)
> >> >> >> Avoid locking in the interrupt context to improve latency. Since we
> >> >> >> don't lock in the interrupt context, it is possible that we now could
> >> >> >> race with the DRV_CONTROL register that writes the enable register and
> >> >> >> cleared by the interrupt handler. For fire-n-forget requests, the
> >> >> >> interrupt may be raised as soon as the TCS is triggered and the IRQ
> >> >> >> handler may clear the enable bit before the DRV_CONTROL is read back.
> >> >> >>
> >> >> >> Use the non-sync variant when enabling the TCS register to avoid reading
> >> >> >> back a value that may been cleared because the interrupt handler ran
> >> >> >> immediately after triggering the TCS.
> >> >> >>
> >> >> >> Signed-off-by: Lina Iyer <[email protected]>
> >> >> >> ---
> >> >> >
> >> >> >I have to read this patch carefully. The commit text isn't convincing me
> >> >> >that it is actually safe to make this change. It mostly talks about the
> >> >> >performance improvements and how we need to fix __tcs_trigger(), which
> >> >> >is good, but I was hoping to be convinced that not grabbing the lock
> >> >> >here is safe.
> >> >> >
> >> >> >How do we ensure that drv->tcs_in_use is cleared before we call
> >> >> >tcs_write() and try to look for a free bit? Isn't it possible that we'll
> >> >> >get into a situation where the bitmap is all used up but the hardware
> >> >> >has just received an interrupt and is going to clear out a bit and then
> >> >> >an rpmh write fails with -EBUSY?
> >> >> >
> >> >> If we have a situation where there are no available free bits, we retry
> >> >> and that is part of the function. Since we have only 2 TCSes avaialble
> >> >> to write to the hardware and there could be multiple requests coming in,
> >> >> it is a very common situation. We try and acquire the drv->lock and if
> >> >> there are free TCS available and if available mark them busy and send
> >> >> our requests. If there are none available, we keep retrying.
> >> >>
> >> >
> >> >Ok. I wonder if we need some sort of barriers here too, like an
> >> >smp_mb__after_atomic()? That way we can make sure that the write to
> >> >clear the bit is seen by another CPU that could be spinning forever
> >> >waiting for that bit to be cleared? Before this change the spinlock
> >> >would be guaranteed to make these barriers for us, but now that doesn't
> >> >seem to be the case. I really hope that this whole thing can be changed
> >> >to be a mutex though, in which case we can use the bit_wait() API, etc.
> >> >to put tasks to sleep while RPMh is processing things.
> >> >
> >> We have drivers that want to send requests in atomic contexts and
> >> therefore mutex locks would not work.
> >
> >Jumping in without reading all the context, but I saw this fly by and
> >it seemed odd. If I'm way off base then please ignore...
> >
> >Can you give more details? Why are these drivers in atomic contexts?
> >If they are in atomic contexts because they are running in the context
> >of an interrupt then your next patch in the series isn't so correct.
> >
> >Also: when people submit requests in atomic context are they always
> >submitting an asynchronous request? In that case we could
> >(presumably) just use a spinlock to protect the queue of async
> >requests and a mutex for everything else?
> Yes, drivers only make async requests in interrupt contexts.
So correct me if I'm off base, but you're saying that drivers make
requests in interrupt contexts even after your whole series and that's
why you're using spinlocks instead of mutexes. ...but then in patch
#3 in your series you say:
> Switch over from using _irqsave/_irqrestore variants since we no longer
> race with a lock from the interrupt handler.
Those seem like contradictions. What happens if someone is holding
the lock, then an interrupt fires, then the interrupt routine wants to
do an async request. Boom, right?
> They cannot
> use the sync variants. The async and sync variants are streamlined into
> the same code path. Hence the use of spinlocks instead of mutexes
> through the critical path.
I will perhaps defer to Stephen who was the one thinking that a mutex
would be a big win here. ...but if a mutex truly is a big win then it
doesn't seem like it'd be that hard to have a linked list (protected
by a spinlock) and then some type of async worker that:
1. Grab the spinlock, pops one element off the linked list, release the spinlock
2. Grab the mutex, send the one element, release the mutex
3. Go back to step #1.
This will keep the spinlock held for as little time as possible.
-Doug
On Thu, Jul 25 2019 at 09:44 -0600, Doug Anderson wrote:
>Hi,
>
>On Thu, Jul 25, 2019 at 8:18 AM Lina Iyer <[email protected]> wrote:
>>
>> On Wed, Jul 24 2019 at 17:28 -0600, Doug Anderson wrote:
>> >Hi,
>> >
>> >On Wed, Jul 24, 2019 at 1:36 PM Lina Iyer <[email protected]> wrote:
>> >>
>> >> On Wed, Jul 24 2019 at 13:38 -0600, Stephen Boyd wrote:
>> >> >Quoting Lina Iyer (2019-07-24 07:52:51)
>> >> >> On Tue, Jul 23 2019 at 14:11 -0600, Stephen Boyd wrote:
>> >> >> >Quoting Lina Iyer (2019-07-22 14:53:38)
>> >> >> >> Avoid locking in the interrupt context to improve latency. Since we
>> >> >> >> don't lock in the interrupt context, it is possible that we now could
>> >> >> >> race with the DRV_CONTROL register that writes the enable register and
>> >> >> >> cleared by the interrupt handler. For fire-n-forget requests, the
>> >> >> >> interrupt may be raised as soon as the TCS is triggered and the IRQ
>> >> >> >> handler may clear the enable bit before the DRV_CONTROL is read back.
>> >> >> >>
>> >> >> >> Use the non-sync variant when enabling the TCS register to avoid reading
>> >> >> >> back a value that may been cleared because the interrupt handler ran
>> >> >> >> immediately after triggering the TCS.
>> >> >> >>
>> >> >> >> Signed-off-by: Lina Iyer <[email protected]>
>> >> >> >> ---
>> >> >> >
>> >> >> >I have to read this patch carefully. The commit text isn't convincing me
>> >> >> >that it is actually safe to make this change. It mostly talks about the
>> >> >> >performance improvements and how we need to fix __tcs_trigger(), which
>> >> >> >is good, but I was hoping to be convinced that not grabbing the lock
>> >> >> >here is safe.
>> >> >> >
>> >> >> >How do we ensure that drv->tcs_in_use is cleared before we call
>> >> >> >tcs_write() and try to look for a free bit? Isn't it possible that we'll
>> >> >> >get into a situation where the bitmap is all used up but the hardware
>> >> >> >has just received an interrupt and is going to clear out a bit and then
>> >> >> >an rpmh write fails with -EBUSY?
>> >> >> >
>> >> >> If we have a situation where there are no available free bits, we retry
>> >> >> and that is part of the function. Since we have only 2 TCSes avaialble
>> >> >> to write to the hardware and there could be multiple requests coming in,
>> >> >> it is a very common situation. We try and acquire the drv->lock and if
>> >> >> there are free TCS available and if available mark them busy and send
>> >> >> our requests. If there are none available, we keep retrying.
>> >> >>
>> >> >
>> >> >Ok. I wonder if we need some sort of barriers here too, like an
>> >> >smp_mb__after_atomic()? That way we can make sure that the write to
>> >> >clear the bit is seen by another CPU that could be spinning forever
>> >> >waiting for that bit to be cleared? Before this change the spinlock
>> >> >would be guaranteed to make these barriers for us, but now that doesn't
>> >> >seem to be the case. I really hope that this whole thing can be changed
>> >> >to be a mutex though, in which case we can use the bit_wait() API, etc.
>> >> >to put tasks to sleep while RPMh is processing things.
>> >> >
>> >> We have drivers that want to send requests in atomic contexts and
>> >> therefore mutex locks would not work.
>> >
>> >Jumping in without reading all the context, but I saw this fly by and
>> >it seemed odd. If I'm way off base then please ignore...
>> >
>> >Can you give more details? Why are these drivers in atomic contexts?
>> >If they are in atomic contexts because they are running in the context
>> >of an interrupt then your next patch in the series isn't so correct.
>> >
>> >Also: when people submit requests in atomic context are they always
>> >submitting an asynchronous request? In that case we could
>> >(presumably) just use a spinlock to protect the queue of async
>> >requests and a mutex for everything else?
>> Yes, drivers only make async requests in interrupt contexts.
>
>So correct me if I'm off base, but you're saying that drivers make
>requests in interrupt contexts even after your whole series and that's
>why you're using spinlocks instead of mutexes. ...but then in patch
>#3 in your series you say:
>
>> Switch over from using _irqsave/_irqrestore variants since we no longer
>> race with a lock from the interrupt handler.
>
>Those seem like contradictions. What happens if someone is holding
>the lock, then an interrupt fires, then the interrupt routine wants to
>do an async request. Boom, right?
>
The interrupt routine is handled by the driver and only completes the
waiting object (for sync requests). No other requests can be made from
our interrupt handler.
>> They cannot
>> use the sync variants. The async and sync variants are streamlined into
>> the same code path. Hence the use of spinlocks instead of mutexes
>> through the critical path.
>
>I will perhaps defer to Stephen who was the one thinking that a mutex
>would be a big win here. ...but if a mutex truly is a big win then it
>doesn't seem like it'd be that hard to have a linked list (protected
>by a spinlock) and then some type of async worker that:
>
>1. Grab the spinlock, pops one element off the linked list, release the spinlock
>2. Grab the mutex, send the one element, release the mutex
This would be a problem when the request is made from an irq handler. We
want to keep things simple and quick.
>3. Go back to step #1.
>
>This will keep the spinlock held for as little time as possible.
Quoting Lina Iyer (2019-07-29 12:01:39)
> On Thu, Jul 25 2019 at 09:44 -0600, Doug Anderson wrote:
> >On Thu, Jul 25, 2019 at 8:18 AM Lina Iyer <[email protected]> wrote:
> >>
> >> On Wed, Jul 24 2019 at 17:28 -0600, Doug Anderson wrote:
> >> >
> >> >Jumping in without reading all the context, but I saw this fly by and
> >> >it seemed odd. If I'm way off base then please ignore...
> >> >
> >> >Can you give more details? Why are these drivers in atomic contexts?
> >> >If they are in atomic contexts because they are running in the context
> >> >of an interrupt then your next patch in the series isn't so correct.
> >> >
> >> >Also: when people submit requests in atomic context are they always
> >> >submitting an asynchronous request? In that case we could
> >> >(presumably) just use a spinlock to protect the queue of async
> >> >requests and a mutex for everything else?
> >> Yes, drivers only make async requests in interrupt contexts.
> >
> >So correct me if I'm off base, but you're saying that drivers make
> >requests in interrupt contexts even after your whole series and that's
> >why you're using spinlocks instead of mutexes. ...but then in patch
> >#3 in your series you say:
> >
> >> Switch over from using _irqsave/_irqrestore variants since we no longer
> >> race with a lock from the interrupt handler.
> >
> >Those seem like contradictions. What happens if someone is holding
> >the lock, then an interrupt fires, then the interrupt routine wants to
> >do an async request. Boom, right?
> >
> The interrupt routine is handled by the driver and only completes the
> waiting object (for sync requests). No other requests can be made from
> our interrupt handler.
The question is more if an interrupt handler for some consumer driver
can call into this code and make an async request. Is that possible? If
so, the concern is that the driver's interrupt handler can run and try
to grab the lock on a CPU that already holds the lock in a non-irq
disabled context. This would lead to a deadlock while the CPU servicing
the interrupt waits for the lock held by another task that's been
interrupted.
>
> >> They cannot
> >> use the sync variants. The async and sync variants are streamlined into
> >> the same code path. Hence the use of spinlocks instead of mutexes
> >> through the critical path.
> >
> >I will perhaps defer to Stephen who was the one thinking that a mutex
> >would be a big win here. ...but if a mutex truly is a big win then it
> >doesn't seem like it'd be that hard to have a linked list (protected
> >by a spinlock) and then some type of async worker that:
> >
> >1. Grab the spinlock, pops one element off the linked list, release the spinlock
> >2. Grab the mutex, send the one element, release the mutex
> This would be a problem when the request is made from an irq handler. We
> want to keep things simple and quick.
>
Is the problem that you want to use RPMh code from deep within the idle
thread? As part of some sort of CPU idle driver for qcom platforms? The
way this discussion is going it sounds like nothing is standing in the
way of a design that use a kthread to pump messages off a queue of
messages that is protected by a spinlock. The kthread would be woken up
by the sync or async write to continue to pump messages out until the
queue is empty.
On Mon, Jul 29 2019 at 14:56 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2019-07-29 12:01:39)
>> On Thu, Jul 25 2019 at 09:44 -0600, Doug Anderson wrote:
>> >On Thu, Jul 25, 2019 at 8:18 AM Lina Iyer <[email protected]> wrote:
>> >>
>> >> On Wed, Jul 24 2019 at 17:28 -0600, Doug Anderson wrote:
>> >> >
>> >> >Jumping in without reading all the context, but I saw this fly by and
>> >> >it seemed odd. If I'm way off base then please ignore...
>> >> >
>> >> >Can you give more details? Why are these drivers in atomic contexts?
>> >> >If they are in atomic contexts because they are running in the context
>> >> >of an interrupt then your next patch in the series isn't so correct.
>> >> >
>> >> >Also: when people submit requests in atomic context are they always
>> >> >submitting an asynchronous request? In that case we could
>> >> >(presumably) just use a spinlock to protect the queue of async
>> >> >requests and a mutex for everything else?
>> >> Yes, drivers only make async requests in interrupt contexts.
>> >
>> >So correct me if I'm off base, but you're saying that drivers make
>> >requests in interrupt contexts even after your whole series and that's
>> >why you're using spinlocks instead of mutexes. ...but then in patch
>> >#3 in your series you say:
>> >
>> >> Switch over from using _irqsave/_irqrestore variants since we no longer
>> >> race with a lock from the interrupt handler.
>> >
>> >Those seem like contradictions. What happens if someone is holding
>> >the lock, then an interrupt fires, then the interrupt routine wants to
>> >do an async request. Boom, right?
>> >
>> The interrupt routine is handled by the driver and only completes the
>> waiting object (for sync requests). No other requests can be made from
>> our interrupt handler.
>
>The question is more if an interrupt handler for some consumer driver
>can call into this code and make an async request. Is that possible? If
>so, the concern is that the driver's interrupt handler can run and try
>to grab the lock on a CPU that already holds the lock in a non-irq
>disabled context. This would lead to a deadlock while the CPU servicing
>the interrupt waits for the lock held by another task that's been
>interrupted.
>
Hmm.. this patch will cause that issue, since we remove the irqsave
aspects of the locking. Let me give that a thought.
>>
>> >> They cannot
>> >> use the sync variants. The async and sync variants are streamlined into
>> >> the same code path. Hence the use of spinlocks instead of mutexes
>> >> through the critical path.
>> >
>> >I will perhaps defer to Stephen who was the one thinking that a mutex
>> >would be a big win here. ...but if a mutex truly is a big win then it
>> >doesn't seem like it'd be that hard to have a linked list (protected
>> >by a spinlock) and then some type of async worker that:
>> >
>> >1. Grab the spinlock, pops one element off the linked list, release the spinlock
>> >2. Grab the mutex, send the one element, release the mutex
>> This would be a problem when the request is made from an irq handler. We
>> want to keep things simple and quick.
>>
>
>Is the problem that you want to use RPMh code from deep within the idle
>thread? As part of some sort of CPU idle driver for qcom platforms? The
>way this discussion is going it sounds like nothing is standing in the
>way of a design that use a kthread to pump messages off a queue of
>messages that is protected by a spinlock. The kthread would be woken up
>by the sync or async write to continue to pump messages out until the
>queue is empty.
>
While it is true that we want to use RPMH in cpuidle driver. Its just
that we had threads and all in our downstream 845 and it complicated the
whole setup a bit too much to our liking and did not help debug either.
I would rather not get all that back in the driver.
--Lina