2021-05-19 17:40:43

by Patrice CHOTARD

[permalink] [raw]
Subject: [PATCH v3 0/3] MTD: spinand: Add spi_mem_poll_status() support

From: Patrice Chotard <[email protected]>

This series adds support for the spi_mem_poll_status() spinand
interface.
Some QSPI controllers allows to poll automatically memory
status during operations (erase, read or write). This allows to
offload the CPU for this task.
STM32 QSPI is supporting this feature, driver update are also
part of this series.

Chnages in v3:
- Add spi_mem_read_status() which allows to read 8 or 16 bits status.
- Add initial_delay_us and polling_delay_us parameters to spi_mem_poll_status().
and also to poll_status() callback.
- Move spi_mem_supports_op() in SW-based polling case.
- Add delay before invoquing read_poll_timeout().
- Remove the reinit/wait_for_completion() added in v2.
- Add initial_delay_us and polling_delay_us parameters to spinand_wait().
- Add SPINAND_READ/WRITE/ERASE/RESET_INITIAL_DELAY_US and
SPINAND_READ/WRITE/ERASE/RESET_POLL_DELAY_US defines.
- Remove spi_mem_finalize_op() API added in v2.

Changes in v2:
- Indicates the spi_mem_poll_status() timeout unit
- Use 2-byte wide status register
- Add spi_mem_supports_op() call in spi_mem_poll_status()
- Add completion management in spi_mem_poll_status()
- Add offload/non-offload case management in spi_mem_poll_status()
- Optimize the non-offload case by using read_poll_timeout()
- mask and match stm32_qspi_poll_status()'s parameters are 2-byte wide
- Make usage of new spi_mem_finalize_op() API in
stm32_qspi_wait_poll_status()

Patrice Chotard (3):
spi: spi-mem: add automatic poll status functions
mtd: spinand: use the spi-mem poll status APIs
spi: stm32-qspi: add automatic poll status feature

drivers/mtd/nand/spi/core.c | 45 +++++++++++++------
drivers/spi/spi-mem.c | 85 ++++++++++++++++++++++++++++++++++++
drivers/spi/spi-stm32-qspi.c | 83 +++++++++++++++++++++++++++++++----
include/linux/mtd/spinand.h | 11 ++++-
include/linux/spi/spi-mem.h | 14 ++++++
5 files changed, 216 insertions(+), 22 deletions(-)

--
2.17.1



2021-05-19 17:41:07

by Patrice CHOTARD

[permalink] [raw]
Subject: [PATCH v3 2/3] mtd: spinand: use the spi-mem poll status APIs

From: Patrice Chotard <[email protected]>

Make use of spi-mem poll status APIs to let advanced controllers
optimize wait operations.

Signed-off-by: Patrice Chotard <[email protected]>
Signed-off-by: Christophe Kerello <[email protected]>
---
Changes in v3:
- Add initial_delay_us and polling_delay_us parameters to spinand_wait()
- Add SPINAND_READ/WRITE/ERASE/RESET_INITIAL_DELAY_US and
SPINAND_READ/WRITE/ERASE/RESET_POLL_DELAY_US defines.

Changes in v2:
- non-offload case is now managed by spi_mem_poll_status()

drivers/mtd/nand/spi/core.c | 45 ++++++++++++++++++++++++++-----------
include/linux/mtd/spinand.h | 11 ++++++++-
2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 17f63f95f4a2..ef2a692ab5b6 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -473,20 +473,26 @@ static int spinand_erase_op(struct spinand_device *spinand,
return spi_mem_exec_op(spinand->spimem, &op);
}

-static int spinand_wait(struct spinand_device *spinand, u8 *s)
+static int spinand_wait(struct spinand_device *spinand,
+ unsigned long initial_delay_us,
+ unsigned long poll_delay_us,
+ u8 *s)
{
- unsigned long timeo = jiffies + msecs_to_jiffies(400);
+ struct spi_mem_op op = SPINAND_GET_FEATURE_OP(REG_STATUS,
+ spinand->scratchbuf);
u8 status;
int ret;

- do {
- ret = spinand_read_status(spinand, &status);
- if (ret)
- return ret;
+ ret = spi_mem_poll_status(spinand->spimem, &op, STATUS_BUSY, 0,
+ initial_delay_us,
+ poll_delay_us,
+ SPINAND_STATUS_TIMEOUT_MS);
+ if (ret)
+ return ret;

- if (!(status & STATUS_BUSY))
- goto out;
- } while (time_before(jiffies, timeo));
+ status = *spinand->scratchbuf;
+ if (!(status & STATUS_BUSY))
+ goto out;

/*
* Extra read, just in case the STATUS_READY bit has changed
@@ -526,7 +532,10 @@ static int spinand_reset_op(struct spinand_device *spinand)
if (ret)
return ret;

- return spinand_wait(spinand, NULL);
+ return spinand_wait(spinand,
+ SPINAND_RESET_INITIAL_DELAY_US,
+ SPINAND_RESET_POLL_DELAY_US,
+ NULL);
}

static int spinand_lock_block(struct spinand_device *spinand, u8 lock)
@@ -549,7 +558,10 @@ static int spinand_read_page(struct spinand_device *spinand,
if (ret)
return ret;

- ret = spinand_wait(spinand, &status);
+ ret = spinand_wait(spinand,
+ SPINAND_READ_INITIAL_DELAY_US,
+ SPINAND_READ_POLL_DELAY_US,
+ &status);
if (ret < 0)
return ret;

@@ -585,7 +597,10 @@ static int spinand_write_page(struct spinand_device *spinand,
if (ret)
return ret;

- ret = spinand_wait(spinand, &status);
+ ret = spinand_wait(spinand,
+ SPINAND_WRITE_INITIAL_DELAY_US,
+ SPINAND_WRITE_POLL_DELAY_US,
+ &status);
if (!ret && (status & STATUS_PROG_FAILED))
return -EIO;

@@ -768,7 +783,11 @@ static int spinand_erase(struct nand_device *nand, const struct nand_pos *pos)
if (ret)
return ret;

- ret = spinand_wait(spinand, &status);
+ ret = spinand_wait(spinand,
+ SPINAND_ERASE_INITIAL_DELAY_US,
+ SPINAND_ERASE_POLL_DELAY_US,
+ &status);
+
if (!ret && (status & STATUS_ERASE_FAILED))
ret = -EIO;

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 6bb92f26833e..180c1fa64e62 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -169,7 +169,16 @@
struct spinand_op;
struct spinand_device;

-#define SPINAND_MAX_ID_LEN 4
+#define SPINAND_MAX_ID_LEN 4
+#define SPINAND_READ_INITIAL_DELAY_US 6
+#define SPINAND_READ_POLL_DELAY_US 5
+#define SPINAND_RESET_INITIAL_DELAY_US 5
+#define SPINAND_RESET_POLL_DELAY_US 5
+#define SPINAND_WRITE_INITIAL_DELAY_US 75
+#define SPINAND_WRITE_POLL_DELAY_US 15
+#define SPINAND_ERASE_INITIAL_DELAY_US 250
+#define SPINAND_ERASE_POLL_DELAY_US 50
+#define SPINAND_STATUS_TIMEOUT_MS 400

/**
* struct spinand_id - SPI NAND id structure
--
2.17.1


2021-05-19 17:42:25

by Patrice CHOTARD

[permalink] [raw]
Subject: [PATCH v3 3/3] spi: stm32-qspi: add automatic poll status feature

From: Patrice Chotard <[email protected]>

STM32 QSPI is able to automatically poll a specified register inside the
memory and relieve the CPU from this task.

As example, when erasing a large memory area, we got cpu load
equal to 50%. This patch allows to perform the same operation
with a cpu load around 2%.

Signed-off-by: Christophe Kerello <[email protected]>
Signed-off-by: Patrice Chotard <[email protected]>
---
Changes in v3:
- Remove spi_mem_finalize_op() API added in v2.

Changes in v2:
- mask and match stm32_qspi_poll_status() parameters are 2-byte wide
- Make usage of new spi_mem_finalize_op() API in stm32_qspi_wait_poll_status()

drivers/spi/spi-stm32-qspi.c | 83 ++++++++++++++++++++++++++++++++----
1 file changed, 75 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index 7e640ccc7e77..01168a859005 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -36,6 +36,7 @@
#define CR_FTIE BIT(18)
#define CR_SMIE BIT(19)
#define CR_TOIE BIT(20)
+#define CR_APMS BIT(22)
#define CR_PRESC_MASK GENMASK(31, 24)

#define QSPI_DCR 0x04
@@ -53,6 +54,7 @@
#define QSPI_FCR 0x0c
#define FCR_CTEF BIT(0)
#define FCR_CTCF BIT(1)
+#define FCR_CSMF BIT(3)

#define QSPI_DLR 0x10

@@ -107,6 +109,7 @@ struct stm32_qspi {
u32 clk_rate;
struct stm32_qspi_flash flash[STM32_QSPI_MAX_NORCHIP];
struct completion data_completion;
+ struct completion match_completion;
u32 fmode;

struct dma_chan *dma_chtx;
@@ -115,6 +118,7 @@ struct stm32_qspi {

u32 cr_reg;
u32 dcr_reg;
+ unsigned long status_timeout;

/*
* to protect device configuration, could be different between
@@ -128,11 +132,20 @@ static irqreturn_t stm32_qspi_irq(int irq, void *dev_id)
struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id;
u32 cr, sr;

+ cr = readl_relaxed(qspi->io_base + QSPI_CR);
sr = readl_relaxed(qspi->io_base + QSPI_SR);

+ if (cr & CR_SMIE && sr & SR_SMF) {
+ /* disable irq */
+ cr &= ~CR_SMIE;
+ writel_relaxed(cr, qspi->io_base + QSPI_CR);
+ complete(&qspi->match_completion);
+
+ return IRQ_HANDLED;
+ }
+
if (sr & (SR_TEF | SR_TCF)) {
/* disable irq */
- cr = readl_relaxed(qspi->io_base + QSPI_CR);
cr &= ~CR_TCIE & ~CR_TEIE;
writel_relaxed(cr, qspi->io_base + QSPI_CR);
complete(&qspi->data_completion);
@@ -319,6 +332,24 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
return err;
}

+static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi,
+ const struct spi_mem_op *op)
+{
+ u32 cr;
+
+ reinit_completion(&qspi->match_completion);
+ cr = readl_relaxed(qspi->io_base + QSPI_CR);
+ writel_relaxed(cr | CR_SMIE, qspi->io_base + QSPI_CR);
+
+ if (!wait_for_completion_timeout(&qspi->match_completion,
+ msecs_to_jiffies(qspi->status_timeout)))
+ return -ETIMEDOUT;
+
+ writel_relaxed(FCR_CSMF, qspi->io_base + QSPI_FCR);
+
+ return 0;
+}
+
static int stm32_qspi_get_mode(struct stm32_qspi *qspi, u8 buswidth)
{
if (buswidth == 4)
@@ -332,7 +363,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
struct stm32_qspi_flash *flash = &qspi->flash[mem->spi->chip_select];
u32 ccr, cr;
- int timeout, err = 0;
+ int timeout, err = 0, err_poll_status = 0;

dev_dbg(qspi->dev, "cmd:%#x mode:%d.%d.%d.%d addr:%#llx len:%#x\n",
op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
@@ -378,6 +409,9 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
if (op->addr.nbytes && qspi->fmode != CCR_FMODE_MM)
writel_relaxed(op->addr.val, qspi->io_base + QSPI_AR);

+ if (qspi->fmode == CCR_FMODE_APM)
+ err_poll_status = stm32_qspi_wait_poll_status(qspi, op);
+
err = stm32_qspi_tx(qspi, op);

/*
@@ -387,7 +421,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
* byte of device (device size - fifo size). like device size is not
* knows, the prefetching is always stop.
*/
- if (err || qspi->fmode == CCR_FMODE_MM)
+ if (err || err_poll_status || qspi->fmode == CCR_FMODE_MM)
goto abort;

/* wait end of tx in indirect mode */
@@ -406,15 +440,46 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
cr, !(cr & CR_ABORT), 1,
STM32_ABT_TIMEOUT_US);

- writel_relaxed(FCR_CTCF, qspi->io_base + QSPI_FCR);
+ writel_relaxed(FCR_CTCF | FCR_CSMF, qspi->io_base + QSPI_FCR);

- if (err || timeout)
- dev_err(qspi->dev, "%s err:%d abort timeout:%d\n",
- __func__, err, timeout);
+ if (err || err_poll_status || timeout)
+ dev_err(qspi->dev, "%s err:%d err_poll_status:%d abort timeout:%d\n",
+ __func__, err, err_poll_status, timeout);

return err;
}

+static int stm32_qspi_poll_status(struct spi_mem *mem, const struct spi_mem_op *op,
+ u16 mask, u16 match,
+ unsigned long initial_delay_us,
+ unsigned long polling_rate_us,
+ unsigned long timeout_ms)
+{
+ struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+ int ret;
+
+ ret = pm_runtime_get_sync(qspi->dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(qspi->dev);
+ return ret;
+ }
+
+ mutex_lock(&qspi->lock);
+
+ writel_relaxed(mask, qspi->io_base + QSPI_PSMKR);
+ writel_relaxed(match, qspi->io_base + QSPI_PSMAR);
+ qspi->fmode = CCR_FMODE_APM;
+ qspi->status_timeout = timeout_ms;
+
+ ret = stm32_qspi_send(mem, op);
+ mutex_unlock(&qspi->lock);
+
+ pm_runtime_mark_last_busy(qspi->dev);
+ pm_runtime_put_autosuspend(qspi->dev);
+
+ return ret;
+}
+
static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
{
struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
@@ -527,7 +592,7 @@ static int stm32_qspi_setup(struct spi_device *spi)
flash->presc = presc;

mutex_lock(&qspi->lock);
- qspi->cr_reg = 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
+ qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);

/* set dcr fsize to max address */
@@ -607,6 +672,7 @@ static const struct spi_controller_mem_ops stm32_qspi_mem_ops = {
.exec_op = stm32_qspi_exec_op,
.dirmap_create = stm32_qspi_dirmap_create,
.dirmap_read = stm32_qspi_dirmap_read,
+ .poll_status = stm32_qspi_poll_status,
};

static int stm32_qspi_probe(struct platform_device *pdev)
@@ -661,6 +727,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
}

init_completion(&qspi->data_completion);
+ init_completion(&qspi->match_completion);

qspi->clk = devm_clk_get(dev, NULL);
if (IS_ERR(qspi->clk)) {
--
2.17.1


2021-05-19 17:56:25

by Patrice CHOTARD

[permalink] [raw]
Subject: Re: [PATCH v3 0/3] MTD: spinand: Add spi_mem_poll_status() support

Just saw a missing update, i am sending a v4.
Sorry
Patrice

On 5/18/21 11:39 AM, [email protected] wrote:
> From: Patrice Chotard <[email protected]>
>
> This series adds support for the spi_mem_poll_status() spinand
> interface.
> Some QSPI controllers allows to poll automatically memory
> status during operations (erase, read or write). This allows to
> offload the CPU for this task.
> STM32 QSPI is supporting this feature, driver update are also
> part of this series.
>
> Chnages in v3:
> - Add spi_mem_read_status() which allows to read 8 or 16 bits status.
> - Add initial_delay_us and polling_delay_us parameters to spi_mem_poll_status().
> and also to poll_status() callback.
> - Move spi_mem_supports_op() in SW-based polling case.
> - Add delay before invoquing read_poll_timeout().
> - Remove the reinit/wait_for_completion() added in v2.
> - Add initial_delay_us and polling_delay_us parameters to spinand_wait().
> - Add SPINAND_READ/WRITE/ERASE/RESET_INITIAL_DELAY_US and
> SPINAND_READ/WRITE/ERASE/RESET_POLL_DELAY_US defines.
> - Remove spi_mem_finalize_op() API added in v2.
>
> Changes in v2:
> - Indicates the spi_mem_poll_status() timeout unit
> - Use 2-byte wide status register
> - Add spi_mem_supports_op() call in spi_mem_poll_status()
> - Add completion management in spi_mem_poll_status()
> - Add offload/non-offload case management in spi_mem_poll_status()
> - Optimize the non-offload case by using read_poll_timeout()
> - mask and match stm32_qspi_poll_status()'s parameters are 2-byte wide
> - Make usage of new spi_mem_finalize_op() API in
> stm32_qspi_wait_poll_status()
>
> Patrice Chotard (3):
> spi: spi-mem: add automatic poll status functions
> mtd: spinand: use the spi-mem poll status APIs
> spi: stm32-qspi: add automatic poll status feature
>
> drivers/mtd/nand/spi/core.c | 45 +++++++++++++------
> drivers/spi/spi-mem.c | 85 ++++++++++++++++++++++++++++++++++++
> drivers/spi/spi-stm32-qspi.c | 83 +++++++++++++++++++++++++++++++----
> include/linux/mtd/spinand.h | 11 ++++-
> include/linux/spi/spi-mem.h | 14 ++++++
> 5 files changed, 216 insertions(+), 22 deletions(-)
>

2021-05-19 17:56:53

by Boris Brezillon

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mtd: spinand: use the spi-mem poll status APIs

On Tue, 18 May 2021 11:39:50 +0200
<[email protected]> wrote:

> From: Patrice Chotard <[email protected]>
>
> Make use of spi-mem poll status APIs to let advanced controllers
> optimize wait operations.

This should also fix the high CPU usage you were reporting for those
that don't have a dedicated STATUS poll block logic, which is great!

>
> Signed-off-by: Patrice Chotard <[email protected]>
> Signed-off-by: Christophe Kerello <[email protected]>
> ---
> Changes in v3:
> - Add initial_delay_us and polling_delay_us parameters to spinand_wait()
> - Add SPINAND_READ/WRITE/ERASE/RESET_INITIAL_DELAY_US and
> SPINAND_READ/WRITE/ERASE/RESET_POLL_DELAY_US defines.
>
> Changes in v2:
> - non-offload case is now managed by spi_mem_poll_status()
>
> drivers/mtd/nand/spi/core.c | 45 ++++++++++++++++++++++++++-----------
> include/linux/mtd/spinand.h | 11 ++++++++-
> 2 files changed, 42 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
> index 17f63f95f4a2..ef2a692ab5b6 100644
> --- a/drivers/mtd/nand/spi/core.c
> +++ b/drivers/mtd/nand/spi/core.c
> @@ -473,20 +473,26 @@ static int spinand_erase_op(struct spinand_device *spinand,
> return spi_mem_exec_op(spinand->spimem, &op);
> }
>
> -static int spinand_wait(struct spinand_device *spinand, u8 *s)
> +static int spinand_wait(struct spinand_device *spinand,
> + unsigned long initial_delay_us,
> + unsigned long poll_delay_us,
> + u8 *s)
> {
> - unsigned long timeo = jiffies + msecs_to_jiffies(400);
> + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(REG_STATUS,
> + spinand->scratchbuf);
> u8 status;
> int ret;
>
> - do {
> - ret = spinand_read_status(spinand, &status);
> - if (ret)
> - return ret;
> + ret = spi_mem_poll_status(spinand->spimem, &op, STATUS_BUSY, 0,
> + initial_delay_us,
> + poll_delay_us,
> + SPINAND_STATUS_TIMEOUT_MS);
> + if (ret)
> + return ret;
>
> - if (!(status & STATUS_BUSY))
> - goto out;
> - } while (time_before(jiffies, timeo));
> + status = *spinand->scratchbuf;
> + if (!(status & STATUS_BUSY))
> + goto out;
>
> /*
> * Extra read, just in case the STATUS_READY bit has changed
> @@ -526,7 +532,10 @@ static int spinand_reset_op(struct spinand_device *spinand)
> if (ret)
> return ret;
>
> - return spinand_wait(spinand, NULL);
> + return spinand_wait(spinand,
> + SPINAND_RESET_INITIAL_DELAY_US,
> + SPINAND_RESET_POLL_DELAY_US,
> + NULL);
> }
>
> static int spinand_lock_block(struct spinand_device *spinand, u8 lock)
> @@ -549,7 +558,10 @@ static int spinand_read_page(struct spinand_device *spinand,
> if (ret)
> return ret;
>
> - ret = spinand_wait(spinand, &status);
> + ret = spinand_wait(spinand,
> + SPINAND_READ_INITIAL_DELAY_US,
> + SPINAND_READ_POLL_DELAY_US,
> + &status);
> if (ret < 0)
> return ret;
>
> @@ -585,7 +597,10 @@ static int spinand_write_page(struct spinand_device *spinand,
> if (ret)
> return ret;
>
> - ret = spinand_wait(spinand, &status);
> + ret = spinand_wait(spinand,
> + SPINAND_WRITE_INITIAL_DELAY_US,
> + SPINAND_WRITE_POLL_DELAY_US,
> + &status);
> if (!ret && (status & STATUS_PROG_FAILED))
> return -EIO;
>
> @@ -768,7 +783,11 @@ static int spinand_erase(struct nand_device *nand, const struct nand_pos *pos)
> if (ret)
> return ret;
>
> - ret = spinand_wait(spinand, &status);
> + ret = spinand_wait(spinand,
> + SPINAND_ERASE_INITIAL_DELAY_US,
> + SPINAND_ERASE_POLL_DELAY_US,
> + &status);
> +
> if (!ret && (status & STATUS_ERASE_FAILED))
> ret = -EIO;
>
> diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
> index 6bb92f26833e..180c1fa64e62 100644
> --- a/include/linux/mtd/spinand.h
> +++ b/include/linux/mtd/spinand.h
> @@ -169,7 +169,16 @@
> struct spinand_op;
> struct spinand_device;
>
> -#define SPINAND_MAX_ID_LEN 4
> +#define SPINAND_MAX_ID_LEN 4
> +#define SPINAND_READ_INITIAL_DELAY_US 6
> +#define SPINAND_READ_POLL_DELAY_US 5
> +#define SPINAND_RESET_INITIAL_DELAY_US 5
> +#define SPINAND_RESET_POLL_DELAY_US 5
> +#define SPINAND_WRITE_INITIAL_DELAY_US 75
> +#define SPINAND_WRITE_POLL_DELAY_US 15
> +#define SPINAND_ERASE_INITIAL_DELAY_US 250
> +#define SPINAND_ERASE_POLL_DELAY_US 50

Could you add a comment explaining where those numbers come from?

> +#define SPINAND_STATUS_TIMEOUT_MS 400

I would name that one SPINAND_WAITRDY_TIMEOUT_MS.

>
> /**
> * struct spinand_id - SPI NAND id structure


2021-05-19 17:59:06

by Boris Brezillon

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] spi: stm32-qspi: add automatic poll status feature

On Tue, 18 May 2021 11:39:51 +0200
<[email protected]> wrote:

> From: Patrice Chotard <[email protected]>
>
> STM32 QSPI is able to automatically poll a specified register inside the
> memory and relieve the CPU from this task.
>
> As example, when erasing a large memory area, we got cpu load
> equal to 50%. This patch allows to perform the same operation
> with a cpu load around 2%.
>
> Signed-off-by: Christophe Kerello <[email protected]>
> Signed-off-by: Patrice Chotard <[email protected]>
> ---
> Changes in v3:
> - Remove spi_mem_finalize_op() API added in v2.
>
> Changes in v2:
> - mask and match stm32_qspi_poll_status() parameters are 2-byte wide
> - Make usage of new spi_mem_finalize_op() API in stm32_qspi_wait_poll_status()
>
> drivers/spi/spi-stm32-qspi.c | 83 ++++++++++++++++++++++++++++++++----
> 1 file changed, 75 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
> index 7e640ccc7e77..01168a859005 100644
> --- a/drivers/spi/spi-stm32-qspi.c
> +++ b/drivers/spi/spi-stm32-qspi.c
> @@ -36,6 +36,7 @@
> #define CR_FTIE BIT(18)
> #define CR_SMIE BIT(19)
> #define CR_TOIE BIT(20)
> +#define CR_APMS BIT(22)
> #define CR_PRESC_MASK GENMASK(31, 24)
>
> #define QSPI_DCR 0x04
> @@ -53,6 +54,7 @@
> #define QSPI_FCR 0x0c
> #define FCR_CTEF BIT(0)
> #define FCR_CTCF BIT(1)
> +#define FCR_CSMF BIT(3)
>
> #define QSPI_DLR 0x10
>
> @@ -107,6 +109,7 @@ struct stm32_qspi {
> u32 clk_rate;
> struct stm32_qspi_flash flash[STM32_QSPI_MAX_NORCHIP];
> struct completion data_completion;
> + struct completion match_completion;
> u32 fmode;
>
> struct dma_chan *dma_chtx;
> @@ -115,6 +118,7 @@ struct stm32_qspi {
>
> u32 cr_reg;
> u32 dcr_reg;
> + unsigned long status_timeout;
>
> /*
> * to protect device configuration, could be different between
> @@ -128,11 +132,20 @@ static irqreturn_t stm32_qspi_irq(int irq, void *dev_id)
> struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id;
> u32 cr, sr;
>
> + cr = readl_relaxed(qspi->io_base + QSPI_CR);
> sr = readl_relaxed(qspi->io_base + QSPI_SR);
>
> + if (cr & CR_SMIE && sr & SR_SMF) {
> + /* disable irq */
> + cr &= ~CR_SMIE;
> + writel_relaxed(cr, qspi->io_base + QSPI_CR);
> + complete(&qspi->match_completion);
> +
> + return IRQ_HANDLED;
> + }
> +
> if (sr & (SR_TEF | SR_TCF)) {
> /* disable irq */
> - cr = readl_relaxed(qspi->io_base + QSPI_CR);
> cr &= ~CR_TCIE & ~CR_TEIE;
> writel_relaxed(cr, qspi->io_base + QSPI_CR);
> complete(&qspi->data_completion);
> @@ -319,6 +332,24 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
> return err;
> }
>
> +static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi,
> + const struct spi_mem_op *op)
> +{
> + u32 cr;
> +
> + reinit_completion(&qspi->match_completion);
> + cr = readl_relaxed(qspi->io_base + QSPI_CR);
> + writel_relaxed(cr | CR_SMIE, qspi->io_base + QSPI_CR);
> +
> + if (!wait_for_completion_timeout(&qspi->match_completion,
> + msecs_to_jiffies(qspi->status_timeout)))
> + return -ETIMEDOUT;
> +
> + writel_relaxed(FCR_CSMF, qspi->io_base + QSPI_FCR);
> +
> + return 0;
> +}
> +
> static int stm32_qspi_get_mode(struct stm32_qspi *qspi, u8 buswidth)
> {
> if (buswidth == 4)
> @@ -332,7 +363,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
> struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
> struct stm32_qspi_flash *flash = &qspi->flash[mem->spi->chip_select];
> u32 ccr, cr;
> - int timeout, err = 0;
> + int timeout, err = 0, err_poll_status = 0;
>
> dev_dbg(qspi->dev, "cmd:%#x mode:%d.%d.%d.%d addr:%#llx len:%#x\n",
> op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
> @@ -378,6 +409,9 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
> if (op->addr.nbytes && qspi->fmode != CCR_FMODE_MM)
> writel_relaxed(op->addr.val, qspi->io_base + QSPI_AR);
>
> + if (qspi->fmode == CCR_FMODE_APM)
> + err_poll_status = stm32_qspi_wait_poll_status(qspi, op);
> +
> err = stm32_qspi_tx(qspi, op);
>
> /*
> @@ -387,7 +421,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
> * byte of device (device size - fifo size). like device size is not
> * knows, the prefetching is always stop.
> */
> - if (err || qspi->fmode == CCR_FMODE_MM)
> + if (err || err_poll_status || qspi->fmode == CCR_FMODE_MM)
> goto abort;
>
> /* wait end of tx in indirect mode */
> @@ -406,15 +440,46 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
> cr, !(cr & CR_ABORT), 1,
> STM32_ABT_TIMEOUT_US);
>
> - writel_relaxed(FCR_CTCF, qspi->io_base + QSPI_FCR);
> + writel_relaxed(FCR_CTCF | FCR_CSMF, qspi->io_base + QSPI_FCR);
>
> - if (err || timeout)
> - dev_err(qspi->dev, "%s err:%d abort timeout:%d\n",
> - __func__, err, timeout);
> + if (err || err_poll_status || timeout)
> + dev_err(qspi->dev, "%s err:%d err_poll_status:%d abort timeout:%d\n",
> + __func__, err, err_poll_status, timeout);
>
> return err;
> }
>
> +static int stm32_qspi_poll_status(struct spi_mem *mem, const struct spi_mem_op *op,
> + u16 mask, u16 match,
> + unsigned long initial_delay_us,
> + unsigned long polling_rate_us,
> + unsigned long timeout_ms)
> +{
> + struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
> + int ret;
> +

Don't you have special constraints on the op that can be passed to poll
status request (does it support more than 1byte of status?)? If not, I
think you should at least call spi_mem_supports_op().

> + ret = pm_runtime_get_sync(qspi->dev);
> + if (ret < 0) {
> + pm_runtime_put_noidle(qspi->dev);
> + return ret;
> + }
> +
> + mutex_lock(&qspi->lock);
> +
> + writel_relaxed(mask, qspi->io_base + QSPI_PSMKR);
> + writel_relaxed(match, qspi->io_base + QSPI_PSMAR);
> + qspi->fmode = CCR_FMODE_APM;
> + qspi->status_timeout = timeout_ms;
> +
> + ret = stm32_qspi_send(mem, op);
> + mutex_unlock(&qspi->lock);
> +
> + pm_runtime_mark_last_busy(qspi->dev);
> + pm_runtime_put_autosuspend(qspi->dev);
> +
> + return ret;
> +}
> +
> static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
> {
> struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
> @@ -527,7 +592,7 @@ static int stm32_qspi_setup(struct spi_device *spi)
> flash->presc = presc;
>
> mutex_lock(&qspi->lock);
> - qspi->cr_reg = 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
> + qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
> writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
>
> /* set dcr fsize to max address */
> @@ -607,6 +672,7 @@ static const struct spi_controller_mem_ops stm32_qspi_mem_ops = {
> .exec_op = stm32_qspi_exec_op,
> .dirmap_create = stm32_qspi_dirmap_create,
> .dirmap_read = stm32_qspi_dirmap_read,
> + .poll_status = stm32_qspi_poll_status,
> };
>
> static int stm32_qspi_probe(struct platform_device *pdev)
> @@ -661,6 +727,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
> }
>
> init_completion(&qspi->data_completion);
> + init_completion(&qspi->match_completion);
>
> qspi->clk = devm_clk_get(dev, NULL);
> if (IS_ERR(qspi->clk)) {


2021-05-19 18:00:09

by Patrice CHOTARD

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mtd: spinand: use the spi-mem poll status APIs

Hi

On 5/18/21 1:28 PM, Boris Brezillon wrote:
> On Tue, 18 May 2021 11:39:50 +0200
> <[email protected]> wrote:
>
>> From: Patrice Chotard <[email protected]>
>>
>> Make use of spi-mem poll status APIs to let advanced controllers
>> optimize wait operations.
>
> This should also fix the high CPU usage you were reporting for those
> that don't have a dedicated STATUS poll block logic, which is great!

I will update the commit message by indicating what you mention here.

>
>>
>> Signed-off-by: Patrice Chotard <[email protected]>
>> Signed-off-by: Christophe Kerello <[email protected]>
>> ---
>> Changes in v3:
>> - Add initial_delay_us and polling_delay_us parameters to spinand_wait()
>> - Add SPINAND_READ/WRITE/ERASE/RESET_INITIAL_DELAY_US and
>> SPINAND_READ/WRITE/ERASE/RESET_POLL_DELAY_US defines.
>>
>> Changes in v2:
>> - non-offload case is now managed by spi_mem_poll_status()
>>
>> drivers/mtd/nand/spi/core.c | 45 ++++++++++++++++++++++++++-----------
>> include/linux/mtd/spinand.h | 11 ++++++++-
>> 2 files changed, 42 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
>> index 17f63f95f4a2..ef2a692ab5b6 100644
>> --- a/drivers/mtd/nand/spi/core.c
>> +++ b/drivers/mtd/nand/spi/core.c
>> @@ -473,20 +473,26 @@ static int spinand_erase_op(struct spinand_device *spinand,
>> return spi_mem_exec_op(spinand->spimem, &op);
>> }
>>
>> -static int spinand_wait(struct spinand_device *spinand, u8 *s)
>> +static int spinand_wait(struct spinand_device *spinand,
>> + unsigned long initial_delay_us,
>> + unsigned long poll_delay_us,
>> + u8 *s)
>> {
>> - unsigned long timeo = jiffies + msecs_to_jiffies(400);
>> + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(REG_STATUS,
>> + spinand->scratchbuf);
>> u8 status;
>> int ret;
>>
>> - do {
>> - ret = spinand_read_status(spinand, &status);
>> - if (ret)
>> - return ret;
>> + ret = spi_mem_poll_status(spinand->spimem, &op, STATUS_BUSY, 0,
>> + initial_delay_us,
>> + poll_delay_us,
>> + SPINAND_STATUS_TIMEOUT_MS);
>> + if (ret)
>> + return ret;
>>
>> - if (!(status & STATUS_BUSY))
>> - goto out;
>> - } while (time_before(jiffies, timeo));
>> + status = *spinand->scratchbuf;
>> + if (!(status & STATUS_BUSY))
>> + goto out;
>>
>> /*
>> * Extra read, just in case the STATUS_READY bit has changed
>> @@ -526,7 +532,10 @@ static int spinand_reset_op(struct spinand_device *spinand)
>> if (ret)
>> return ret;
>>
>> - return spinand_wait(spinand, NULL);
>> + return spinand_wait(spinand,
>> + SPINAND_RESET_INITIAL_DELAY_US,
>> + SPINAND_RESET_POLL_DELAY_US,
>> + NULL);
>> }
>>
>> static int spinand_lock_block(struct spinand_device *spinand, u8 lock)
>> @@ -549,7 +558,10 @@ static int spinand_read_page(struct spinand_device *spinand,
>> if (ret)
>> return ret;
>>
>> - ret = spinand_wait(spinand, &status);
>> + ret = spinand_wait(spinand,
>> + SPINAND_READ_INITIAL_DELAY_US,
>> + SPINAND_READ_POLL_DELAY_US,
>> + &status);
>> if (ret < 0)
>> return ret;
>>
>> @@ -585,7 +597,10 @@ static int spinand_write_page(struct spinand_device *spinand,
>> if (ret)
>> return ret;
>>
>> - ret = spinand_wait(spinand, &status);
>> + ret = spinand_wait(spinand,
>> + SPINAND_WRITE_INITIAL_DELAY_US,
>> + SPINAND_WRITE_POLL_DELAY_US,
>> + &status);
>> if (!ret && (status & STATUS_PROG_FAILED))
>> return -EIO;
>>
>> @@ -768,7 +783,11 @@ static int spinand_erase(struct nand_device *nand, const struct nand_pos *pos)
>> if (ret)
>> return ret;
>>
>> - ret = spinand_wait(spinand, &status);
>> + ret = spinand_wait(spinand,
>> + SPINAND_ERASE_INITIAL_DELAY_US,
>> + SPINAND_ERASE_POLL_DELAY_US,
>> + &status);
>> +
>> if (!ret && (status & STATUS_ERASE_FAILED))
>> ret = -EIO;
>>
>> diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
>> index 6bb92f26833e..180c1fa64e62 100644
>> --- a/include/linux/mtd/spinand.h
>> +++ b/include/linux/mtd/spinand.h
>> @@ -169,7 +169,16 @@
>> struct spinand_op;
>> struct spinand_device;
>>
>> -#define SPINAND_MAX_ID_LEN 4
>> +#define SPINAND_MAX_ID_LEN 4
>> +#define SPINAND_READ_INITIAL_DELAY_US 6
>> +#define SPINAND_READ_POLL_DELAY_US 5
>> +#define SPINAND_RESET_INITIAL_DELAY_US 5
>> +#define SPINAND_RESET_POLL_DELAY_US 5
>> +#define SPINAND_WRITE_INITIAL_DELAY_US 75
>> +#define SPINAND_WRITE_POLL_DELAY_US 15
>> +#define SPINAND_ERASE_INITIAL_DELAY_US 250
>> +#define SPINAND_ERASE_POLL_DELAY_US 50
>
> Could you add a comment explaining where those numbers come from?

Sure

>
>> +#define SPINAND_STATUS_TIMEOUT_MS 400
>
> I would name that one SPINAND_WAITRDY_TIMEOUT_MS.
Ok

>
>>
>> /**
>> * struct spinand_id - SPI NAND id structure
>

Thanks
Patrice

2021-05-19 18:01:01

by Patrice CHOTARD

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] spi: stm32-qspi: add automatic poll status feature

Hi

On 5/18/21 1:39 PM, Boris Brezillon wrote:
> On Tue, 18 May 2021 11:39:51 +0200
> <[email protected]> wrote:
>
>> From: Patrice Chotard <[email protected]>
>>
>> STM32 QSPI is able to automatically poll a specified register inside the
>> memory and relieve the CPU from this task.
>>
>> As example, when erasing a large memory area, we got cpu load
>> equal to 50%. This patch allows to perform the same operation
>> with a cpu load around 2%.
>>
>> Signed-off-by: Christophe Kerello <[email protected]>
>> Signed-off-by: Patrice Chotard <[email protected]>
>> ---
>> Changes in v3:
>> - Remove spi_mem_finalize_op() API added in v2.
>>
>> Changes in v2:
>> - mask and match stm32_qspi_poll_status() parameters are 2-byte wide
>> - Make usage of new spi_mem_finalize_op() API in stm32_qspi_wait_poll_status()
>>
>> drivers/spi/spi-stm32-qspi.c | 83 ++++++++++++++++++++++++++++++++----
>> 1 file changed, 75 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
>> index 7e640ccc7e77..01168a859005 100644
>> --- a/drivers/spi/spi-stm32-qspi.c
>> +++ b/drivers/spi/spi-stm32-qspi.c
>> @@ -36,6 +36,7 @@
>> #define CR_FTIE BIT(18)
>> #define CR_SMIE BIT(19)
>> #define CR_TOIE BIT(20)
>> +#define CR_APMS BIT(22)
>> #define CR_PRESC_MASK GENMASK(31, 24)
>>
>> #define QSPI_DCR 0x04
>> @@ -53,6 +54,7 @@
>> #define QSPI_FCR 0x0c
>> #define FCR_CTEF BIT(0)
>> #define FCR_CTCF BIT(1)
>> +#define FCR_CSMF BIT(3)
>>
>> #define QSPI_DLR 0x10
>>
>> @@ -107,6 +109,7 @@ struct stm32_qspi {
>> u32 clk_rate;
>> struct stm32_qspi_flash flash[STM32_QSPI_MAX_NORCHIP];
>> struct completion data_completion;
>> + struct completion match_completion;
>> u32 fmode;
>>
>> struct dma_chan *dma_chtx;
>> @@ -115,6 +118,7 @@ struct stm32_qspi {
>>
>> u32 cr_reg;
>> u32 dcr_reg;
>> + unsigned long status_timeout;
>>
>> /*
>> * to protect device configuration, could be different between
>> @@ -128,11 +132,20 @@ static irqreturn_t stm32_qspi_irq(int irq, void *dev_id)
>> struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id;
>> u32 cr, sr;
>>
>> + cr = readl_relaxed(qspi->io_base + QSPI_CR);
>> sr = readl_relaxed(qspi->io_base + QSPI_SR);
>>
>> + if (cr & CR_SMIE && sr & SR_SMF) {
>> + /* disable irq */
>> + cr &= ~CR_SMIE;
>> + writel_relaxed(cr, qspi->io_base + QSPI_CR);
>> + complete(&qspi->match_completion);
>> +
>> + return IRQ_HANDLED;
>> + }
>> +
>> if (sr & (SR_TEF | SR_TCF)) {
>> /* disable irq */
>> - cr = readl_relaxed(qspi->io_base + QSPI_CR);
>> cr &= ~CR_TCIE & ~CR_TEIE;
>> writel_relaxed(cr, qspi->io_base + QSPI_CR);
>> complete(&qspi->data_completion);
>> @@ -319,6 +332,24 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
>> return err;
>> }
>>
>> +static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi,
>> + const struct spi_mem_op *op)
>> +{
>> + u32 cr;
>> +
>> + reinit_completion(&qspi->match_completion);
>> + cr = readl_relaxed(qspi->io_base + QSPI_CR);
>> + writel_relaxed(cr | CR_SMIE, qspi->io_base + QSPI_CR);
>> +
>> + if (!wait_for_completion_timeout(&qspi->match_completion,
>> + msecs_to_jiffies(qspi->status_timeout)))
>> + return -ETIMEDOUT;
>> +
>> + writel_relaxed(FCR_CSMF, qspi->io_base + QSPI_FCR);
>> +
>> + return 0;
>> +}
>> +
>> static int stm32_qspi_get_mode(struct stm32_qspi *qspi, u8 buswidth)
>> {
>> if (buswidth == 4)
>> @@ -332,7 +363,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
>> struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
>> struct stm32_qspi_flash *flash = &qspi->flash[mem->spi->chip_select];
>> u32 ccr, cr;
>> - int timeout, err = 0;
>> + int timeout, err = 0, err_poll_status = 0;
>>
>> dev_dbg(qspi->dev, "cmd:%#x mode:%d.%d.%d.%d addr:%#llx len:%#x\n",
>> op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
>> @@ -378,6 +409,9 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
>> if (op->addr.nbytes && qspi->fmode != CCR_FMODE_MM)
>> writel_relaxed(op->addr.val, qspi->io_base + QSPI_AR);
>>
>> + if (qspi->fmode == CCR_FMODE_APM)
>> + err_poll_status = stm32_qspi_wait_poll_status(qspi, op);
>> +
>> err = stm32_qspi_tx(qspi, op);
>>
>> /*
>> @@ -387,7 +421,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
>> * byte of device (device size - fifo size). like device size is not
>> * knows, the prefetching is always stop.
>> */
>> - if (err || qspi->fmode == CCR_FMODE_MM)
>> + if (err || err_poll_status || qspi->fmode == CCR_FMODE_MM)
>> goto abort;
>>
>> /* wait end of tx in indirect mode */
>> @@ -406,15 +440,46 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
>> cr, !(cr & CR_ABORT), 1,
>> STM32_ABT_TIMEOUT_US);
>>
>> - writel_relaxed(FCR_CTCF, qspi->io_base + QSPI_FCR);
>> + writel_relaxed(FCR_CTCF | FCR_CSMF, qspi->io_base + QSPI_FCR);
>>
>> - if (err || timeout)
>> - dev_err(qspi->dev, "%s err:%d abort timeout:%d\n",
>> - __func__, err, timeout);
>> + if (err || err_poll_status || timeout)
>> + dev_err(qspi->dev, "%s err:%d err_poll_status:%d abort timeout:%d\n",
>> + __func__, err, err_poll_status, timeout);
>>
>> return err;
>> }
>>
>> +static int stm32_qspi_poll_status(struct spi_mem *mem, const struct spi_mem_op *op,
>> + u16 mask, u16 match,
>> + unsigned long initial_delay_us,
>> + unsigned long polling_rate_us,
>> + unsigned long timeout_ms)
>> +{
>> + struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
>> + int ret;
>> +
>
> Don't you have special constraints on the op that can be passed to poll
> status request (does it support more than 1byte of status?)? If not, I
> think you should at least call spi_mem_supports_op().

We don't have special constraints, the STM32 QSPI supports up to 32bits status.

>
>> + ret = pm_runtime_get_sync(qspi->dev);
>> + if (ret < 0) {
>> + pm_runtime_put_noidle(qspi->dev);
>> + return ret;
>> + }
>> +
>> + mutex_lock(&qspi->lock);
>> +
>> + writel_relaxed(mask, qspi->io_base + QSPI_PSMKR);
>> + writel_relaxed(match, qspi->io_base + QSPI_PSMAR);
>> + qspi->fmode = CCR_FMODE_APM;
>> + qspi->status_timeout = timeout_ms;
>> +
>> + ret = stm32_qspi_send(mem, op);
>> + mutex_unlock(&qspi->lock);
>> +
>> + pm_runtime_mark_last_busy(qspi->dev);
>> + pm_runtime_put_autosuspend(qspi->dev);
>> +
>> + return ret;
>> +}
>> +
>> static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
>> {
>> struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
>> @@ -527,7 +592,7 @@ static int stm32_qspi_setup(struct spi_device *spi)
>> flash->presc = presc;
>>
>> mutex_lock(&qspi->lock);
>> - qspi->cr_reg = 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
>> + qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
>> writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
>>
>> /* set dcr fsize to max address */
>> @@ -607,6 +672,7 @@ static const struct spi_controller_mem_ops stm32_qspi_mem_ops = {
>> .exec_op = stm32_qspi_exec_op,
>> .dirmap_create = stm32_qspi_dirmap_create,
>> .dirmap_read = stm32_qspi_dirmap_read,
>> + .poll_status = stm32_qspi_poll_status,
>> };
>>
>> static int stm32_qspi_probe(struct platform_device *pdev)
>> @@ -661,6 +727,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
>> }
>>
>> init_completion(&qspi->data_completion);
>> + init_completion(&qspi->match_completion);
>>
>> qspi->clk = devm_clk_get(dev, NULL);
>> if (IS_ERR(qspi->clk)) {
>