If the hardware is still accessing memory after SMMU translation
is disabled (as part of smmu shutdown callback), then the
IOVAs (I/O virtual address) which it was using will go on the bus
as the physical addresses which will result in unknown crashes
like NoC/interconnect errors.
So, implement shutdown callback to i2c driver to stop on-going transfer
and unmap DMA mappings during system "reboot" or "shutdown".
Fixes: 37692de5d523 ("i2c: i2c-qcom-geni: Add bus driver for the Qualcomm GENI I2C controller")
Signed-off-by: Roja Rani Yarubandi <[email protected]>
---
Changes in V2:
- As per Stephen's comments added seperate function for stop transfer,
fixed minor nitpicks.
- As per Stephen's comments, changed commit text.
Changes in V3:
- As per Stephen's comments, squashed patch 1 into patch 2, added Fixes tag.
- As per Akash's comments, included FIFO case in stop_xfer, fixed minor nitpicks.
Changes in V4:
- As per Stephen's comments cleaned up geni_i2c_stop_xfer function,
added dma_buf in geni_i2c_dev struct to call i2c_put_dma_safe_msg_buf()
from other functions, removed "iova" check in geni_se_rx_dma_unprep()
and geni_se_tx_dma_unprep() functions.
- Added two helper functions geni_i2c_rx_one_msg_done() and
geni_i2c_tx_one_msg_done() to unwrap the things after rx/tx FIFO/DMA
transfers, so that the same can be used in geni_i2c_stop_xfer() function
during shutdown callback. Updated commit text accordingly.
- Checking whether it is tx/rx transfer using I2C_M_RD which is valid for both
FIFO and DMA cases, so dropped DMA_RX_ACTIVE and DMA_TX_ACTIVE bit checking
Changes in V5:
- As per Stephen's comments, added spin_lock_irqsave & spin_unlock_irqsave in
geni_i2c_stop_xfer() function.
Changes in V6:
- As per Stephen's comments, taken care of unsafe lock order in
geni_i2c_stop_xfer().
- Moved spin_lock/unlock to geni_i2c_rx_msg_cleanup() and
geni_i2c_tx_msg_cleanup() functions.
Changes in V7:
- No changes
Changes in V8:
- As per Wolfram Sang comment, removed goto and modified geni_i2c_stop_xfer()
accordingly.
Changes in V9:
- Fixed possbile race by protecting gi2c->cur and calling geni_i2c_abort_xfer()
with adding another parameter to differentiate from which sequence is the
geni_i2c_abort_xfer() called and handle the spin_lock/spin_unlock accordingly
inside geni_i2c_abort_xfer(). For this added two macros ABORT_XFER and
STOP_AND_ABORT_XFER.
- Added a bool variable "stop_xfer" in geni_i2c_dev struct, used to put stop
to upcoming geni_i2c_rx_one_msg() and geni_i2c_tx_one_msg() calls once we
recieve the shutdown call.
- Added gi2c->cur == NULL check in geni_i2c_irq() to not to process the irq
even if any transfer is queued and shutdown to HW received.
drivers/i2c/busses/i2c-qcom-geni.c | 71 +++++++++++++++++++++++++++---
1 file changed, 64 insertions(+), 7 deletions(-)
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 214b4c913a13..8ae17ccad99e 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -71,6 +71,8 @@ enum geni_i2c_err_code {
#define ABORT_TIMEOUT HZ
#define XFER_TIMEOUT HZ
#define RST_TIMEOUT HZ
+#define ABORT_XFER 0
+#define STOP_AND_ABORT_XFER 1
struct geni_i2c_dev {
struct geni_se se;
@@ -89,6 +91,7 @@ struct geni_i2c_dev {
void *dma_buf;
size_t xfer_len;
dma_addr_t dma_addr;
+ bool stop_xfer;
};
struct geni_i2c_err_log {
@@ -215,6 +218,11 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev)
struct i2c_msg *cur;
spin_lock(&gi2c->lock);
+ if (!gi2c->cur) {
+ dev_err(gi2c->se.dev, "Can't process irq, gi2c->cur is NULL\n");
+ spin_unlock(&gi2c->lock);
+ return IRQ_HANDLED;
+ }
m_stat = readl_relaxed(base + SE_GENI_M_IRQ_STATUS);
rx_st = readl_relaxed(base + SE_GENI_RX_FIFO_STATUS);
dm_tx_st = readl_relaxed(base + SE_DMA_TX_IRQ_STAT);
@@ -222,8 +230,7 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev)
dma = readl_relaxed(base + SE_GENI_DMA_MODE_EN);
cur = gi2c->cur;
- if (!cur ||
- m_stat & (M_CMD_FAILURE_EN | M_CMD_ABORT_EN) ||
+ if (m_stat & (M_CMD_FAILURE_EN | M_CMD_ABORT_EN) ||
dm_rx_st & (DM_I2C_CB_ERR)) {
if (m_stat & M_GP_IRQ_1_EN)
geni_i2c_err(gi2c, NACK);
@@ -301,17 +308,19 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev)
return IRQ_HANDLED;
}
-static void geni_i2c_abort_xfer(struct geni_i2c_dev *gi2c)
+static void geni_i2c_abort_xfer(struct geni_i2c_dev *gi2c, bool is_stop_xfer)
{
u32 val;
unsigned long time_left = ABORT_TIMEOUT;
unsigned long flags;
- spin_lock_irqsave(&gi2c->lock, flags);
+ if (!is_stop_xfer)
+ spin_lock_irqsave(&gi2c->lock, flags);
geni_i2c_err(gi2c, GENI_TIMEOUT);
gi2c->cur = NULL;
geni_se_abort_m_cmd(&gi2c->se);
- spin_unlock_irqrestore(&gi2c->lock, flags);
+ if (!is_stop_xfer)
+ spin_unlock_irqrestore(&gi2c->lock, flags);
do {
time_left = wait_for_completion_timeout(&gi2c->done, time_left);
val = readl_relaxed(gi2c->se.base + SE_GENI_M_IRQ_STATUS);
@@ -375,6 +384,38 @@ static void geni_i2c_tx_msg_cleanup(struct geni_i2c_dev *gi2c,
}
}
+static void geni_i2c_stop_xfer(struct geni_i2c_dev *gi2c)
+{
+ int ret;
+ u32 geni_status;
+ struct i2c_msg *cur;
+ unsigned long flags;
+
+ /* Resume device, as runtime suspend can happen anytime during transfer */
+ ret = pm_runtime_get_sync(gi2c->se.dev);
+ if (ret < 0) {
+ dev_err(gi2c->se.dev, "Failed to resume device: %d\n", ret);
+ return;
+ }
+
+ spin_lock_irqsave(&gi2c->lock, flags);
+ gi2c->stop_xfer = 1;
+ geni_status = readl_relaxed(gi2c->se.base + SE_GENI_STATUS);
+ if (geni_status & M_GENI_CMD_ACTIVE) {
+ cur = gi2c->cur;
+ geni_i2c_abort_xfer(gi2c, STOP_AND_ABORT_XFER);
+ spin_unlock_irqrestore(&gi2c->lock, flags);
+ if (cur->flags & I2C_M_RD)
+ geni_i2c_rx_msg_cleanup(gi2c, cur);
+ else
+ geni_i2c_tx_msg_cleanup(gi2c, cur);
+ } else {
+ spin_unlock_irqrestore(&gi2c->lock, flags);
+ }
+
+ pm_runtime_put_sync_suspend(gi2c->se.dev);
+}
+
static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
u32 m_param)
{
@@ -407,7 +448,7 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
cur = gi2c->cur;
time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
if (!time_left)
- geni_i2c_abort_xfer(gi2c);
+ geni_i2c_abort_xfer(gi2c, ABORT_XFER);
geni_i2c_rx_msg_cleanup(gi2c, cur);
@@ -449,7 +490,7 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
cur = gi2c->cur;
time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
if (!time_left)
- geni_i2c_abort_xfer(gi2c);
+ geni_i2c_abort_xfer(gi2c, ABORT_XFER);
geni_i2c_tx_msg_cleanup(gi2c, cur);
@@ -462,6 +503,7 @@ static int geni_i2c_xfer(struct i2c_adapter *adap,
{
struct geni_i2c_dev *gi2c = i2c_get_adapdata(adap);
int i, ret;
+ unsigned long flags;
gi2c->err = 0;
reinit_completion(&gi2c->done);
@@ -480,7 +522,13 @@ static int geni_i2c_xfer(struct i2c_adapter *adap,
m_param |= ((msgs[i].addr << SLV_ADDR_SHFT) & SLV_ADDR_MSK);
+ spin_lock_irqsave(&gi2c->lock, flags);
+ if (gi2c->stop_xfer) {
+ spin_unlock_irqrestore(&gi2c->lock, flags);
+ break;
+ }
gi2c->cur = &msgs[i];
+ spin_unlock_irqrestore(&gi2c->lock, flags);
if (msgs[i].flags & I2C_M_RD)
ret = geni_i2c_rx_one_msg(gi2c, &msgs[i], m_param);
else
@@ -624,6 +672,7 @@ static int geni_i2c_probe(struct platform_device *pdev)
dev_dbg(dev, "i2c fifo/se-dma mode. fifo depth:%d\n", tx_depth);
gi2c->suspended = 1;
+ gi2c->stop_xfer = 0;
pm_runtime_set_suspended(gi2c->se.dev);
pm_runtime_set_autosuspend_delay(gi2c->se.dev, I2C_AUTO_SUSPEND_DELAY);
pm_runtime_use_autosuspend(gi2c->se.dev);
@@ -650,6 +699,13 @@ static int geni_i2c_remove(struct platform_device *pdev)
return 0;
}
+static void geni_i2c_shutdown(struct platform_device *pdev)
+{
+ struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
+
+ geni_i2c_stop_xfer(gi2c);
+}
+
static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
{
int ret;
@@ -714,6 +770,7 @@ MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
static struct platform_driver geni_i2c_driver = {
.probe = geni_i2c_probe,
.remove = geni_i2c_remove,
+ .shutdown = geni_i2c_shutdown,
.driver = {
.name = "geni_i2c",
.pm = &geni_i2c_pm_ops,
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
Quoting Roja Rani Yarubandi (2021-04-20 04:13:55)
> diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
> index 214b4c913a13..8ae17ccad99e 100644
> --- a/drivers/i2c/busses/i2c-qcom-geni.c
> +++ b/drivers/i2c/busses/i2c-qcom-geni.c
> @@ -71,6 +71,8 @@ enum geni_i2c_err_code {
> #define ABORT_TIMEOUT HZ
> #define XFER_TIMEOUT HZ
> #define RST_TIMEOUT HZ
> +#define ABORT_XFER 0
> +#define STOP_AND_ABORT_XFER 1
These should be an enum.
>
> struct geni_i2c_dev {
> struct geni_se se;
> @@ -89,6 +91,7 @@ struct geni_i2c_dev {
> void *dma_buf;
> size_t xfer_len;
> dma_addr_t dma_addr;
> + bool stop_xfer;
> };
>
> struct geni_i2c_err_log {
> @@ -215,6 +218,11 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev)
> struct i2c_msg *cur;
>
> spin_lock(&gi2c->lock);
> + if (!gi2c->cur) {
> + dev_err(gi2c->se.dev, "Can't process irq, gi2c->cur is NULL\n");
This error message is worthless. The user won't know what to do and then
we return IRQ_HANDLED? If the device is misbehaving we should return
IRQ_NONE and shut down the irq storm that will soon be upon us, not
print an error message and hope for the best.
> + spin_unlock(&gi2c->lock);
> + return IRQ_HANDLED;
> + }
> m_stat = readl_relaxed(base + SE_GENI_M_IRQ_STATUS);
> rx_st = readl_relaxed(base + SE_GENI_RX_FIFO_STATUS);
> dm_tx_st = readl_relaxed(base + SE_DMA_TX_IRQ_STAT);
> @@ -222,8 +230,7 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev)
> dma = readl_relaxed(base + SE_GENI_DMA_MODE_EN);
> cur = gi2c->cur;
>
> - if (!cur ||
> - m_stat & (M_CMD_FAILURE_EN | M_CMD_ABORT_EN) ||
> + if (m_stat & (M_CMD_FAILURE_EN | M_CMD_ABORT_EN) ||
> dm_rx_st & (DM_I2C_CB_ERR)) {
> if (m_stat & M_GP_IRQ_1_EN)
> geni_i2c_err(gi2c, NACK);
> @@ -301,17 +308,19 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev)
> return IRQ_HANDLED;
> }
>
> -static void geni_i2c_abort_xfer(struct geni_i2c_dev *gi2c)
> +static void geni_i2c_abort_xfer(struct geni_i2c_dev *gi2c, bool is_stop_xfer)
The bool should be an enum, but a better approach would be to have a
locked and unlocked version of this function.
> {
> u32 val;
> unsigned long time_left = ABORT_TIMEOUT;
> unsigned long flags;
>
> - spin_lock_irqsave(&gi2c->lock, flags);
> + if (!is_stop_xfer)
> + spin_lock_irqsave(&gi2c->lock, flags);
> geni_i2c_err(gi2c, GENI_TIMEOUT);
> gi2c->cur = NULL;
> geni_se_abort_m_cmd(&gi2c->se);
> - spin_unlock_irqrestore(&gi2c->lock, flags);
> + if (!is_stop_xfer)
> + spin_unlock_irqrestore(&gi2c->lock, flags);
Please no conditional locking. It's too hard to reason about.
> do {
> time_left = wait_for_completion_timeout(&gi2c->done, time_left);
> val = readl_relaxed(gi2c->se.base + SE_GENI_M_IRQ_STATUS);
> @@ -375,6 +384,38 @@ static void geni_i2c_tx_msg_cleanup(struct geni_i2c_dev *gi2c,
> }
> }
>
> +static void geni_i2c_stop_xfer(struct geni_i2c_dev *gi2c)
> +{
> + int ret;
> + u32 geni_status;
> + struct i2c_msg *cur;
> + unsigned long flags;
> +
> + /* Resume device, as runtime suspend can happen anytime during transfer */
This comment doesn't make any sense. Hopefully a suspend can't happen
during a transfer, but only before or after a transfer. Otherwise, the
transfer code is broken and isn't properly keeping the device runtime
resumed during the transfer.
> + ret = pm_runtime_get_sync(gi2c->se.dev);
> + if (ret < 0) {
> + dev_err(gi2c->se.dev, "Failed to resume device: %d\n", ret);
> + return;
> + }
> +
> + spin_lock_irqsave(&gi2c->lock, flags);
> + gi2c->stop_xfer = 1;
> + geni_status = readl_relaxed(gi2c->se.base + SE_GENI_STATUS);
> + if (geni_status & M_GENI_CMD_ACTIVE) {
> + cur = gi2c->cur;
> + geni_i2c_abort_xfer(gi2c, STOP_AND_ABORT_XFER);
> + spin_unlock_irqrestore(&gi2c->lock, flags);
> + if (cur->flags & I2C_M_RD)
> + geni_i2c_rx_msg_cleanup(gi2c, cur);
> + else
> + geni_i2c_tx_msg_cleanup(gi2c, cur);
> + } else {
> + spin_unlock_irqrestore(&gi2c->lock, flags);
> + }
Please unlock outside of an if condition. A local variable can be used
outside of the unlock, but then the code is easier to follow
spin_lock_irqsave(&gi2c->lock, flags);
if (geni_status & M_GENI_CMD_ACTIVE) {
cur = gic2->cur;
geni_i2c_abort_xfer(....);
}
spin_unlock_irqrestore(gi2c->lock, flags);
if (cur) {
if (cur->flags & I2C_M_RD)
...
else
...
}
And then I don't really know if grabbing 'cur' out of the struct and
then messing with it outside the lock is correct.
> +
> + pm_runtime_put_sync_suspend(gi2c->se.dev);
> +}
> +
> static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
> u32 m_param)
> {
> @@ -407,7 +448,7 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
> cur = gi2c->cur;
> time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
> if (!time_left)
> - geni_i2c_abort_xfer(gi2c);
> + geni_i2c_abort_xfer(gi2c, ABORT_XFER);
So this would say geni_i2c_abort_xfer() but the one above would say
geni_i2c_abort_xfer_locked() because the lock is already held.
>
> geni_i2c_rx_msg_cleanup(gi2c, cur);
>
> @@ -449,7 +490,7 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
> cur = gi2c->cur;
> time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
> if (!time_left)
> - geni_i2c_abort_xfer(gi2c);
> + geni_i2c_abort_xfer(gi2c, ABORT_XFER);
>
> geni_i2c_tx_msg_cleanup(gi2c, cur);
>
> @@ -462,6 +503,7 @@ static int geni_i2c_xfer(struct i2c_adapter *adap,
> {
> struct geni_i2c_dev *gi2c = i2c_get_adapdata(adap);
> int i, ret;
> + unsigned long flags;
>
> gi2c->err = 0;
> reinit_completion(&gi2c->done);
> @@ -480,7 +522,13 @@ static int geni_i2c_xfer(struct i2c_adapter *adap,
>
> m_param |= ((msgs[i].addr << SLV_ADDR_SHFT) & SLV_ADDR_MSK);
>
> + spin_lock_irqsave(&gi2c->lock, flags);
> + if (gi2c->stop_xfer) {
> + spin_unlock_irqrestore(&gi2c->lock, flags);
> + break;
> + }
> gi2c->cur = &msgs[i];
> + spin_unlock_irqrestore(&gi2c->lock, flags);
Is this to jump into the transfer real fast and break out if we're in
the middle of a transfer?
> if (msgs[i].flags & I2C_M_RD)
> ret = geni_i2c_rx_one_msg(gi2c, &msgs[i], m_param);
> else
> @@ -624,6 +672,7 @@ static int geni_i2c_probe(struct platform_device *pdev)
> dev_dbg(dev, "i2c fifo/se-dma mode. fifo depth:%d\n", tx_depth);
>
> gi2c->suspended = 1;
> + gi2c->stop_xfer = 0;
> pm_runtime_set_suspended(gi2c->se.dev);
> pm_runtime_set_autosuspend_delay(gi2c->se.dev, I2C_AUTO_SUSPEND_DELAY);
> pm_runtime_use_autosuspend(gi2c->se.dev);
> @@ -650,6 +699,13 @@ static int geni_i2c_remove(struct platform_device *pdev)
> return 0;
> }
>
> +static void geni_i2c_shutdown(struct platform_device *pdev)
> +{
> + struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
> +
> + geni_i2c_stop_xfer(gi2c);
It would read better as
geni_i2c_plug_xfer(gi2c);
or
geni_i2c_flush_and_teardown(gi2c);
or
geni_i2c_teardown_tx_rx(gi2c);
Something that says we're waiting for any transfer to complete, and then
plugging the queue and removing the i2c bus entirely.
In fact, where is that code? I'd expect to see i2c_del_adapter() in here
so we know the adapter can't accept transfers anymore. Maybe
i2c_del_adapter() could be called, and then there's nothing to do after
that? This whole patch is trying to rip the adapter out from under the
i2c core framework, when we should take the opposite approach and remove
it from the core framework so that it can't transfer anything anymore
and thus the IOMMU can remove the mapping.
> +}
> +
> static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
> {
> int ret;
Hi Stephen,
On 2021-05-05 07:08, Stephen Boyd wrote:
> Quoting Roja Rani Yarubandi (2021-04-20 04:13:55)
>> diff --git a/drivers/i2c/busses/i2c-qcom-geni.c
>> b/drivers/i2c/busses/i2c-qcom-geni.c
>> index 214b4c913a13..8ae17ccad99e 100644
>> --- a/drivers/i2c/busses/i2c-qcom-geni.c
>> +++ b/drivers/i2c/busses/i2c-qcom-geni.c
>> @@ -71,6 +71,8 @@ enum geni_i2c_err_code {
>> #define ABORT_TIMEOUT HZ
>> #define XFER_TIMEOUT HZ
>> #define RST_TIMEOUT HZ
>> +#define ABORT_XFER 0
>> +#define STOP_AND_ABORT_XFER 1
>
> These should be an enum.
>
Okay.
>>
>> struct geni_i2c_dev {
>> struct geni_se se;
>> @@ -89,6 +91,7 @@ struct geni_i2c_dev {
>> void *dma_buf;
>> size_t xfer_len;
>> dma_addr_t dma_addr;
>> + bool stop_xfer;
>> };
>>
>> struct geni_i2c_err_log {
>> @@ -215,6 +218,11 @@ static irqreturn_t geni_i2c_irq(int irq, void
>> *dev)
>> struct i2c_msg *cur;
>>
>> spin_lock(&gi2c->lock);
>> + if (!gi2c->cur) {
>> + dev_err(gi2c->se.dev, "Can't process irq, gi2c->cur is
>> NULL\n");
>
> This error message is worthless. The user won't know what to do and
> then
> we return IRQ_HANDLED? If the device is misbehaving we should return
> IRQ_NONE and shut down the irq storm that will soon be upon us, not
> print an error message and hope for the best.
>
Okay, will return IRQ_NONE. The reason I have kept this error message is
to know
that this case has occurred.
>> + spin_unlock(&gi2c->lock);
>> + return IRQ_HANDLED;
>> + }
>> m_stat = readl_relaxed(base + SE_GENI_M_IRQ_STATUS);
>> rx_st = readl_relaxed(base + SE_GENI_RX_FIFO_STATUS);
>> dm_tx_st = readl_relaxed(base + SE_DMA_TX_IRQ_STAT);
>> @@ -222,8 +230,7 @@ static irqreturn_t geni_i2c_irq(int irq, void
>> *dev)
>> dma = readl_relaxed(base + SE_GENI_DMA_MODE_EN);
>> cur = gi2c->cur;
>>
>> - if (!cur ||
>> - m_stat & (M_CMD_FAILURE_EN | M_CMD_ABORT_EN) ||
>> + if (m_stat & (M_CMD_FAILURE_EN | M_CMD_ABORT_EN) ||
>> dm_rx_st & (DM_I2C_CB_ERR)) {
>> if (m_stat & M_GP_IRQ_1_EN)
>> geni_i2c_err(gi2c, NACK);
>> @@ -301,17 +308,19 @@ static irqreturn_t geni_i2c_irq(int irq, void
>> *dev)
>> return IRQ_HANDLED;
>> }
>>
>> -static void geni_i2c_abort_xfer(struct geni_i2c_dev *gi2c)
>> +static void geni_i2c_abort_xfer(struct geni_i2c_dev *gi2c, bool
>> is_stop_xfer)
>
> The bool should be an enum, but a better approach would be to have a
> locked and unlocked version of this function.
>
Okay, will try to come up with the better approach.
>> {
>> u32 val;
>> unsigned long time_left = ABORT_TIMEOUT;
>> unsigned long flags;
>>
>> - spin_lock_irqsave(&gi2c->lock, flags);
>> + if (!is_stop_xfer)
>> + spin_lock_irqsave(&gi2c->lock, flags);
>> geni_i2c_err(gi2c, GENI_TIMEOUT);
>> gi2c->cur = NULL;
>> geni_se_abort_m_cmd(&gi2c->se);
>> - spin_unlock_irqrestore(&gi2c->lock, flags);
>> + if (!is_stop_xfer)
>> + spin_unlock_irqrestore(&gi2c->lock, flags);
>
> Please no conditional locking. It's too hard to reason about.
>
This will be fixed with the above mentioned optimal approach.
>> do {
>> time_left = wait_for_completion_timeout(&gi2c->done,
>> time_left);
>> val = readl_relaxed(gi2c->se.base +
>> SE_GENI_M_IRQ_STATUS);
>> @@ -375,6 +384,38 @@ static void geni_i2c_tx_msg_cleanup(struct
>> geni_i2c_dev *gi2c,
>> }
>> }
>>
>> +static void geni_i2c_stop_xfer(struct geni_i2c_dev *gi2c)
>> +{
>> + int ret;
>> + u32 geni_status;
>> + struct i2c_msg *cur;
>> + unsigned long flags;
>> +
>> + /* Resume device, as runtime suspend can happen anytime during
>> transfer */
>
> This comment doesn't make any sense. Hopefully a suspend can't happen
> during a transfer, but only before or after a transfer. Otherwise, the
> transfer code is broken and isn't properly keeping the device runtime
> resumed during the transfer.
>
Okay, instead of resuming the device, I will check for the device
status,
and if it is in resume state will proceed accordingly.
>> + ret = pm_runtime_get_sync(gi2c->se.dev);
>> + if (ret < 0) {
>> + dev_err(gi2c->se.dev, "Failed to resume device: %d\n",
>> ret);
>> + return;
>> + }
>> +
>> + spin_lock_irqsave(&gi2c->lock, flags);
>> + gi2c->stop_xfer = 1;
>> + geni_status = readl_relaxed(gi2c->se.base + SE_GENI_STATUS);
>> + if (geni_status & M_GENI_CMD_ACTIVE) {
>> + cur = gi2c->cur;
>> + geni_i2c_abort_xfer(gi2c, STOP_AND_ABORT_XFER);
>> + spin_unlock_irqrestore(&gi2c->lock, flags);
>> + if (cur->flags & I2C_M_RD)
>> + geni_i2c_rx_msg_cleanup(gi2c, cur);
>> + else
>> + geni_i2c_tx_msg_cleanup(gi2c, cur);
>> + } else {
>> + spin_unlock_irqrestore(&gi2c->lock, flags);
>> + }
>
> Please unlock outside of an if condition. A local variable can be used
> outside of the unlock, but then the code is easier to follow
>
Okay.
> spin_lock_irqsave(&gi2c->lock, flags);
> if (geni_status & M_GENI_CMD_ACTIVE) {
> cur = gic2->cur;
> geni_i2c_abort_xfer(....);
> }
> spin_unlock_irqrestore(gi2c->lock, flags);
>
> if (cur) {
> if (cur->flags & I2C_M_RD)
> ...
> else
> ...
> }
>
> And then I don't really know if grabbing 'cur' out of the struct and
> then messing with it outside the lock is correct.
>
I think it shouldn't be a problem, gi2c->cur will be NULL, and 'cur'
should be
fine to use, and we have fixed the race between this and geni_i2c_xfer()
with proper protection.
>> +
>> + pm_runtime_put_sync_suspend(gi2c->se.dev);
>> +}
>> +
>> static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct
>> i2c_msg *msg,
>> u32 m_param)
>> {
>> @@ -407,7 +448,7 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev
>> *gi2c, struct i2c_msg *msg,
>> cur = gi2c->cur;
>> time_left = wait_for_completion_timeout(&gi2c->done,
>> XFER_TIMEOUT);
>> if (!time_left)
>> - geni_i2c_abort_xfer(gi2c);
>> + geni_i2c_abort_xfer(gi2c, ABORT_XFER);
>
> So this would say geni_i2c_abort_xfer() but the one above would say
> geni_i2c_abort_xfer_locked() because the lock is already held.
>
Correct.
>>
>> geni_i2c_rx_msg_cleanup(gi2c, cur);
>>
>> @@ -449,7 +490,7 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev
>> *gi2c, struct i2c_msg *msg,
>> cur = gi2c->cur;
>> time_left = wait_for_completion_timeout(&gi2c->done,
>> XFER_TIMEOUT);
>> if (!time_left)
>> - geni_i2c_abort_xfer(gi2c);
>> + geni_i2c_abort_xfer(gi2c, ABORT_XFER);
>>
>> geni_i2c_tx_msg_cleanup(gi2c, cur);
>>
>> @@ -462,6 +503,7 @@ static int geni_i2c_xfer(struct i2c_adapter *adap,
>> {
>> struct geni_i2c_dev *gi2c = i2c_get_adapdata(adap);
>> int i, ret;
>> + unsigned long flags;
>>
>> gi2c->err = 0;
>> reinit_completion(&gi2c->done);
>> @@ -480,7 +522,13 @@ static int geni_i2c_xfer(struct i2c_adapter
>> *adap,
>>
>> m_param |= ((msgs[i].addr << SLV_ADDR_SHFT) &
>> SLV_ADDR_MSK);
>>
>> + spin_lock_irqsave(&gi2c->lock, flags);
>> + if (gi2c->stop_xfer) {
>> + spin_unlock_irqrestore(&gi2c->lock, flags);
>> + break;
>> + }
>> gi2c->cur = &msgs[i];
>> + spin_unlock_irqrestore(&gi2c->lock, flags);
>
> Is this to jump into the transfer real fast and break out if we're in
> the middle of a transfer?
>
Yes, to break the loop.
>> if (msgs[i].flags & I2C_M_RD)
>> ret = geni_i2c_rx_one_msg(gi2c, &msgs[i],
>> m_param);
>> else
>> @@ -624,6 +672,7 @@ static int geni_i2c_probe(struct platform_device
>> *pdev)
>> dev_dbg(dev, "i2c fifo/se-dma mode. fifo depth:%d\n",
>> tx_depth);
>>
>> gi2c->suspended = 1;
>> + gi2c->stop_xfer = 0;
>> pm_runtime_set_suspended(gi2c->se.dev);
>> pm_runtime_set_autosuspend_delay(gi2c->se.dev,
>> I2C_AUTO_SUSPEND_DELAY);
>> pm_runtime_use_autosuspend(gi2c->se.dev);
>> @@ -650,6 +699,13 @@ static int geni_i2c_remove(struct platform_device
>> *pdev)
>> return 0;
>> }
>>
>> +static void geni_i2c_shutdown(struct platform_device *pdev)
>> +{
>> + struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
>> +
>> + geni_i2c_stop_xfer(gi2c);
>
> It would read better as
>
> geni_i2c_plug_xfer(gi2c);
>
> or
>
> geni_i2c_flush_and_teardown(gi2c);
>
> or
>
> geni_i2c_teardown_tx_rx(gi2c);
>
> Something that says we're waiting for any transfer to complete, and
> then
> plugging the queue and removing the i2c bus entirely.
>
Sure, will change.
> In fact, where is that code? I'd expect to see i2c_del_adapter() in
> here
> so we know the adapter can't accept transfers anymore. Maybe
> i2c_del_adapter() could be called, and then there's nothing to do after
> that? This whole patch is trying to rip the adapter out from under the
> i2c core framework, when we should take the opposite approach and
> remove
> it from the core framework so that it can't transfer anything anymore
> and thus the IOMMU can remove the mapping.
>
IIUC about probe/remove/shutdown calls, during "remove" we will unplug
the
device with opposite calls to "probe's" plug operations.
For example i2c_add_adapter() from 'probe' and i2c_del_adapter() from
'remove'.
For "shutdown", as system is going to shutdown, there is no need of
unplug
operations to be done.
And also, I had a glance on other upstream i2c drivers, and noticed
"i2c-i801.c"
driver has i2c_del_adapter() called from remove callback but not from
shutdown
callback.
And actually I tried calling i2c_del_adapter() from geni_i2c_shutdown(),
and it resulted in below WARNING trace
[ 90.320282] Call trace:
[ 90.322807] _regulator_put+0xc4/0xcc
[ 90.326583] regulator_bulk_free+0x48/0x6c
[ 90.330808] devm_regulator_bulk_release+0x20/0x2c
[ 90.335744] release_nodes+0x1d0/0x244
[ 90.339609] devres_release_all+0x3c/0x54
[ 90.343735] device_release_driver_internal+0x108/0x194
[ 90.349109] device_release_driver+0x24/0x30
[ 90.353510] bus_remove_device+0xd0/0xf4
[ 90.357548] device_del+0x1a8/0x2f8
[ 90.361143] device_unregister+0x1c/0x34
[ 90.365181] __unregister_client+0x78/0x88
[ 90.369397] device_for_each_child+0x64/0xb4
[ 90.373797] i2c_del_adapter+0xf0/0x1d4
[ 90.377745] geni_i2c_shutdown+0x9c/0xc0
[ 90.381783] platform_drv_shutdown+0x28/0x34
[ 90.386182] device_shutdown+0x148/0x1f0
Can you please suggest me what might be missing here?
>> +}
>> +
>> static int __maybe_unused geni_i2c_runtime_suspend(struct device
>> *dev)
>> {
>> int ret;
Thanks,
Roja
Quoting [email protected] (2021-05-07 03:07:42)
> On 2021-05-05 07:08, Stephen Boyd wrote:
> > Quoting Roja Rani Yarubandi (2021-04-20 04:13:55)
>
> > In fact, where is that code? I'd expect to see i2c_del_adapter() in
> > here
> > so we know the adapter can't accept transfers anymore. Maybe
> > i2c_del_adapter() could be called, and then there's nothing to do after
> > that? This whole patch is trying to rip the adapter out from under the
> > i2c core framework, when we should take the opposite approach and
> > remove
> > it from the core framework so that it can't transfer anything anymore
> > and thus the IOMMU can remove the mapping.
> >
>
> IIUC about probe/remove/shutdown calls, during "remove" we will unplug
> the
> device with opposite calls to "probe's" plug operations.
> For example i2c_add_adapter() from 'probe' and i2c_del_adapter() from
> 'remove'.
> For "shutdown", as system is going to shutdown, there is no need of
> unplug
> operations to be done.
>
> And also, I had a glance on other upstream i2c drivers, and noticed
> "i2c-i801.c"
> driver has i2c_del_adapter() called from remove callback but not from
> shutdown
> callback.
Sure, other drivers could also be broken.
>
> And actually I tried calling i2c_del_adapter() from geni_i2c_shutdown(),
> and it resulted in below WARNING trace
> [ 90.320282] Call trace:
> [ 90.322807] _regulator_put+0xc4/0xcc
> [ 90.326583] regulator_bulk_free+0x48/0x6c
> [ 90.330808] devm_regulator_bulk_release+0x20/0x2c
> [ 90.335744] release_nodes+0x1d0/0x244
> [ 90.339609] devres_release_all+0x3c/0x54
> [ 90.343735] device_release_driver_internal+0x108/0x194
> [ 90.349109] device_release_driver+0x24/0x30
> [ 90.353510] bus_remove_device+0xd0/0xf4
> [ 90.357548] device_del+0x1a8/0x2f8
> [ 90.361143] device_unregister+0x1c/0x34
> [ 90.365181] __unregister_client+0x78/0x88
> [ 90.369397] device_for_each_child+0x64/0xb4
> [ 90.373797] i2c_del_adapter+0xf0/0x1d4
> [ 90.377745] geni_i2c_shutdown+0x9c/0xc0
> [ 90.381783] platform_drv_shutdown+0x28/0x34
> [ 90.386182] device_shutdown+0x148/0x1f0
>
> Can you please suggest me what might be missing here?
>
It looks like some device that is on the i2c bus is putting a regulator
in the remove path without disabling it. Can you print out which device
driver it is and fix that driver to call regulator_disable() on the
driver remove path? I'll try locally and see if I can find the driver
too.
Quoting Stephen Boyd (2021-05-07 13:09:21)
> Quoting [email protected] (2021-05-07 03:07:42)
> > On 2021-05-05 07:08, Stephen Boyd wrote:
> > > Quoting Roja Rani Yarubandi (2021-04-20 04:13:55)
> >
> > > In fact, where is that code? I'd expect to see i2c_del_adapter() in
> > > here
> > > so we know the adapter can't accept transfers anymore. Maybe
> > > i2c_del_adapter() could be called, and then there's nothing to do after
> > > that? This whole patch is trying to rip the adapter out from under the
> > > i2c core framework, when we should take the opposite approach and
> > > remove
> > > it from the core framework so that it can't transfer anything anymore
> > > and thus the IOMMU can remove the mapping.
> > >
> >
> > IIUC about probe/remove/shutdown calls, during "remove" we will unplug
> > the
> > device with opposite calls to "probe's" plug operations.
> > For example i2c_add_adapter() from 'probe' and i2c_del_adapter() from
> > 'remove'.
> > For "shutdown", as system is going to shutdown, there is no need of
> > unplug
> > operations to be done.
> >
> > And also, I had a glance on other upstream i2c drivers, and noticed
> > "i2c-i801.c"
> > driver has i2c_del_adapter() called from remove callback but not from
> > shutdown
> > callback.
>
> Sure, other drivers could also be broken.
What does it have in the shutdown callback? I see that it is wrong to
delete the adapter in shutdown because this problem happens. First
shutdown is called for various i2c clients, then shutdown is called for
the adapter. If the adapter shutdown calls i2c_del_adapter(), then
remove is called for the various i2c clients. The i2c clients aren't
expecting this and start doing double frees and stuff. It's really quite
a mess. I suspect i2c shutdown should probably block remove from being
called on it entirely. Either way, it's the wrong approach.
Instead, I think we should merely suspend the i2c bus like this. Then we
can hunt down the various drivers that try to access the bus after the
i2c bus has been removed. I've already done that for rt5682 (see the
patch link later).
----8<---
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c
b/drivers/i2c/busses/i2c-qcom-geni.c
index 20216e382b4c..af3ed808ba2e 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -655,6 +655,14 @@ static int geni_i2c_remove(struct platform_device *pdev)
return 0;
}
+static void geni_i2c_shutdown(struct platform_device *pdev)
+{
+ struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
+
+ /* Make client i2c transfers start failing */
+ i2c_mark_adapter_suspended(&gi2c->adap);
+}
+
static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
{
int ret;
@@ -719,6 +727,7 @@ MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
static struct platform_driver geni_i2c_driver = {
.probe = geni_i2c_probe,
.remove = geni_i2c_remove,
+ .shutdown = geni_i2c_shutdown,
.driver = {
.name = "geni_i2c",
.pm = &geni_i2c_pm_ops,
>
> >
> > And actually I tried calling i2c_del_adapter() from geni_i2c_shutdown(),
> > and it resulted in below WARNING trace
> > [ 90.320282] Call trace:
> > [ 90.322807] _regulator_put+0xc4/0xcc
> > [ 90.326583] regulator_bulk_free+0x48/0x6c
> > [ 90.330808] devm_regulator_bulk_release+0x20/0x2c
> > [ 90.335744] release_nodes+0x1d0/0x244
> > [ 90.339609] devres_release_all+0x3c/0x54
> > [ 90.343735] device_release_driver_internal+0x108/0x194
> > [ 90.349109] device_release_driver+0x24/0x30
> > [ 90.353510] bus_remove_device+0xd0/0xf4
> > [ 90.357548] device_del+0x1a8/0x2f8
> > [ 90.361143] device_unregister+0x1c/0x34
> > [ 90.365181] __unregister_client+0x78/0x88
> > [ 90.369397] device_for_each_child+0x64/0xb4
> > [ 90.373797] i2c_del_adapter+0xf0/0x1d4
> > [ 90.377745] geni_i2c_shutdown+0x9c/0xc0
> > [ 90.381783] platform_drv_shutdown+0x28/0x34
> > [ 90.386182] device_shutdown+0x148/0x1f0
> >
> > Can you please suggest me what might be missing here?
> >
>
> It looks like some device that is on the i2c bus is putting a regulator
> in the remove path without disabling it. Can you print out which device
> driver it is and fix that driver to call regulator_disable() on the
> driver remove path? I'll try locally and see if I can find the driver
> too.
I see that it's the rt5682 driver. I sent
https://lore.kernel.org/r/[email protected]
for this in case you want to look, but it won't be necessary.
On 2021-05-08 13:26, Stephen Boyd wrote:
> Quoting Stephen Boyd (2021-05-07 13:09:21)
>> Quoting [email protected] (2021-05-07 03:07:42)
>> > On 2021-05-05 07:08, Stephen Boyd wrote:
>> > > Quoting Roja Rani Yarubandi (2021-04-20 04:13:55)
>> >
>> > > In fact, where is that code? I'd expect to see i2c_del_adapter() in
>> > > here
>> > > so we know the adapter can't accept transfers anymore. Maybe
>> > > i2c_del_adapter() could be called, and then there's nothing to do after
>> > > that? This whole patch is trying to rip the adapter out from under the
>> > > i2c core framework, when we should take the opposite approach and
>> > > remove
>> > > it from the core framework so that it can't transfer anything anymore
>> > > and thus the IOMMU can remove the mapping.
>> > >
>> >
>> > IIUC about probe/remove/shutdown calls, during "remove" we will unplug
>> > the
>> > device with opposite calls to "probe's" plug operations.
>> > For example i2c_add_adapter() from 'probe' and i2c_del_adapter() from
>> > 'remove'.
>> > For "shutdown", as system is going to shutdown, there is no need of
>> > unplug
>> > operations to be done.
>> >
>> > And also, I had a glance on other upstream i2c drivers, and noticed
>> > "i2c-i801.c"
>> > driver has i2c_del_adapter() called from remove callback but not from
>> > shutdown
>> > callback.
>>
>> Sure, other drivers could also be broken.
>
> What does it have in the shutdown callback? I see that it is wrong to
> delete the adapter in shutdown because this problem happens. First
> shutdown is called for various i2c clients, then shutdown is called for
> the adapter. If the adapter shutdown calls i2c_del_adapter(), then
> remove is called for the various i2c clients. The i2c clients aren't
> expecting this and start doing double frees and stuff. It's really
> quite
> a mess. I suspect i2c shutdown should probably block remove from being
> called on it entirely. Either way, it's the wrong approach.
>
> Instead, I think we should merely suspend the i2c bus like this. Then
> we
> can hunt down the various drivers that try to access the bus after the
> i2c bus has been removed. I've already done that for rt5682 (see the
> patch link later).
>
Ok. I will proceed with the current approach only then
(not calling i2c_del_adapter() in shutdown). I will post the
patch with the other comments answered.
> ----8<---
> diff --git a/drivers/i2c/busses/i2c-qcom-geni.c
> b/drivers/i2c/busses/i2c-qcom-geni.c
> index 20216e382b4c..af3ed808ba2e 100644
> --- a/drivers/i2c/busses/i2c-qcom-geni.c
> +++ b/drivers/i2c/busses/i2c-qcom-geni.c
> @@ -655,6 +655,14 @@ static int geni_i2c_remove(struct platform_device
> *pdev)
> return 0;
> }
>
> +static void geni_i2c_shutdown(struct platform_device *pdev)
> +{
> + struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
> +
> + /* Make client i2c transfers start failing */
> + i2c_mark_adapter_suspended(&gi2c->adap);
> +}
> +
> static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
> {
> int ret;
> @@ -719,6 +727,7 @@ MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
> static struct platform_driver geni_i2c_driver = {
> .probe = geni_i2c_probe,
> .remove = geni_i2c_remove,
> + .shutdown = geni_i2c_shutdown,
> .driver = {
> .name = "geni_i2c",
> .pm = &geni_i2c_pm_ops,
>
>>
>> >
>> > And actually I tried calling i2c_del_adapter() from geni_i2c_shutdown(),
>> > and it resulted in below WARNING trace
>> > [ 90.320282] Call trace:
>> > [ 90.322807] _regulator_put+0xc4/0xcc
>> > [ 90.326583] regulator_bulk_free+0x48/0x6c
>> > [ 90.330808] devm_regulator_bulk_release+0x20/0x2c
>> > [ 90.335744] release_nodes+0x1d0/0x244
>> > [ 90.339609] devres_release_all+0x3c/0x54
>> > [ 90.343735] device_release_driver_internal+0x108/0x194
>> > [ 90.349109] device_release_driver+0x24/0x30
>> > [ 90.353510] bus_remove_device+0xd0/0xf4
>> > [ 90.357548] device_del+0x1a8/0x2f8
>> > [ 90.361143] device_unregister+0x1c/0x34
>> > [ 90.365181] __unregister_client+0x78/0x88
>> > [ 90.369397] device_for_each_child+0x64/0xb4
>> > [ 90.373797] i2c_del_adapter+0xf0/0x1d4
>> > [ 90.377745] geni_i2c_shutdown+0x9c/0xc0
>> > [ 90.381783] platform_drv_shutdown+0x28/0x34
>> > [ 90.386182] device_shutdown+0x148/0x1f0
>> >
>> > Can you please suggest me what might be missing here?
>> >
>>
>> It looks like some device that is on the i2c bus is putting a
>> regulator
>> in the remove path without disabling it. Can you print out which
>> device
>> driver it is and fix that driver to call regulator_disable() on the
>> driver remove path? I'll try locally and see if I can find the driver
>> too.
>
> I see that it's the rt5682 driver. I sent
> https://lore.kernel.org/r/[email protected]
> for this in case you want to look, but it won't be necessary.