2023-04-19 06:32:19

by Jaewon Kim

[permalink] [raw]
Subject: [PATCH v2 0/4] Improves polling mode of s3c64xx driver

1.
s3cx64xx driver was supporting polling mode using quirk for SOC without DMA.
However, in order to use PIO mode as an optional rather than a quirk, when DMA
is not described, spi operates with pio mode rather than probe fail.

2.
Fixed the problem of high CPU usage in PIO mode.

3.
If the transfer data size is larger than 32-bit, IRQ base PIO mode used.


Jaewon Kim (4):
spi: s3c64xx: changed to PIO mode if there is no DMA
spi: s3c64xx: add cpu_relax in polling loop
spi: s3c64xx: add sleep during transfer
spi: s3c64xx: support interrupt based pio mode

drivers/spi/spi-s3c64xx.c | 85 ++++++++++++++++++++---
include/linux/platform_data/spi-s3c64xx.h | 1 +
2 files changed, 76 insertions(+), 10 deletions(-)

--
2.17.1


2023-04-19 06:35:52

by Jaewon Kim

[permalink] [raw]
Subject: [PATCH v2 2/4] spi: s3c64xx: add cpu_relax in polling loop

Adds cpu_relax() to prevent long busy-wait.
There is busy-wait loop to check data transfer completion in polling mode.

Signed-off-by: Jaewon Kim <[email protected]>
---
drivers/spi/spi-s3c64xx.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 273aa02322d9..886722fb40ea 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -568,6 +568,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,

val = msecs_to_loops(ms);
do {
+ cpu_relax();
status = readl(regs + S3C64XX_SPI_STATUS);
} while (RX_FIFO_LVL(status, sdd) < xfer->len && --val);

--
2.17.1

2023-04-19 06:37:28

by Jaewon Kim

[permalink] [raw]
Subject: [PATCH v2 1/4] spi: s3c64xx: changed to PIO mode if there is no DMA

Polling mode supported with qurik if there was no DMA in the SOC.
However, there are cased where we cannot or do not want to use DMA.
To support this case, if DMA is not set, it is switched to polling mode.

Signed-off-by: Jaewon Kim <[email protected]>
---
drivers/spi/spi-s3c64xx.c | 8 ++++++--
include/linux/platform_data/spi-s3c64xx.h | 1 +
2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 71d324ec9a70..273aa02322d9 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -19,7 +19,6 @@
#include <linux/platform_data/spi-s3c64xx.h>

#define MAX_SPI_PORTS 12
-#define S3C64XX_SPI_QUIRK_POLL (1 << 0)
#define S3C64XX_SPI_QUIRK_CS_AUTO (1 << 1)
#define AUTOSUSPEND_TIMEOUT 2000

@@ -116,7 +115,7 @@
#define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT

#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
-#define is_polling(x) (x->port_conf->quirks & S3C64XX_SPI_QUIRK_POLL)
+#define is_polling(x) (x->cntrlr_info->polling)

#define RXBUSY (1<<2)
#define TXBUSY (1<<3)
@@ -1067,6 +1066,11 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
sci->num_cs = temp;
}

+ if (!of_find_property(dev->of_node, "dmas", NULL)) {
+ dev_warn(dev, "cannot find DMA, changed to PIO mode\n");
+ sci->polling = 1;
+ }
+
sci->no_cs = of_property_read_bool(dev->of_node, "no-cs-readback");

return sci;
diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h
index 5df1ace6d2c9..cb7b8ddc899f 100644
--- a/include/linux/platform_data/spi-s3c64xx.h
+++ b/include/linux/platform_data/spi-s3c64xx.h
@@ -35,6 +35,7 @@ struct s3c64xx_spi_info {
int src_clk_nr;
int num_cs;
bool no_cs;
+ bool polling;
int (*cfg_gpio)(void);
};

--
2.17.1

2023-04-19 06:46:43

by Jaewon Kim

[permalink] [raw]
Subject: [PATCH v2 4/4] spi: s3c64xx: support interrupt based pio mode

Interrupt based pio mode is supported to reduce CPU load.
If transfer size is larger than 32 byte, it is processed using interrupt.

Signed-off-by: Jaewon Kim <[email protected]>
---
drivers/spi/spi-s3c64xx.c | 82 ++++++++++++++++++++++++++++++++-------
1 file changed, 67 insertions(+), 15 deletions(-)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index cf3060b2639b..ce1afb9a4ed4 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -58,6 +58,8 @@
#define S3C64XX_SPI_MODE_BUS_TSZ_HALFWORD (1<<17)
#define S3C64XX_SPI_MODE_BUS_TSZ_WORD (2<<17)
#define S3C64XX_SPI_MODE_BUS_TSZ_MASK (3<<17)
+#define S3C64XX_SPI_MODE_RX_RDY_LVL GENMASK(16, 11)
+#define S3C64XX_SPI_MODE_RX_RDY_LVL_SHIFT 11
#define S3C64XX_SPI_MODE_SELF_LOOPBACK (1<<3)
#define S3C64XX_SPI_MODE_RXDMA_ON (1<<2)
#define S3C64XX_SPI_MODE_TXDMA_ON (1<<1)
@@ -114,6 +116,8 @@

#define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT

+#define S3C64XX_SPI_POLLING_SIZE 32
+
#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
#define is_polling(x) (x->cntrlr_info->polling)

@@ -552,10 +556,11 @@ static int s3c64xx_wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
}

static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
- struct spi_transfer *xfer)
+ struct spi_transfer *xfer, int use_irq)
{
void __iomem *regs = sdd->regs;
unsigned long val;
+ unsigned long time;
u32 status;
int loops;
u32 cpy_len;
@@ -563,17 +568,24 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
int ms;
u32 tx_time;

- /* sleep during signal transfer time */
- status = readl(regs + S3C64XX_SPI_STATUS);
- if (RX_FIFO_LVL(status, sdd) < xfer->len) {
- tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
- usleep_range(tx_time / 2, tx_time);
- }
-
/* millisecs to xfer 'len' bytes @ 'cur_speed' */
ms = xfer->len * 8 * 1000 / sdd->cur_speed;
ms += 10; /* some tolerance */

+ if (use_irq) {
+ val = msecs_to_jiffies(ms);
+ time = wait_for_completion_timeout(&sdd->xfer_completion, val);
+ if (!time)
+ return -EIO;
+ } else {
+ /* sleep during signal transfer time */
+ status = readl(regs + S3C64XX_SPI_STATUS);
+ if (RX_FIFO_LVL(status, sdd) < xfer->len) {
+ tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
+ usleep_range(tx_time / 2, tx_time);
+ }
+ }
+
val = msecs_to_loops(ms);
do {
cpu_relax();
@@ -737,10 +749,13 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
void *rx_buf = NULL;
int target_len = 0, origin_len = 0;
int use_dma = 0;
+ int use_irq = 0;
int status;
u32 speed;
u8 bpw;
unsigned long flags;
+ u32 rdy_lv;
+ u32 val;

reinit_completion(&sdd->xfer_completion);

@@ -761,17 +776,46 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
sdd->rx_dma.ch && sdd->tx_dma.ch) {
use_dma = 1;

- } else if (xfer->len > fifo_len) {
+ } else if (xfer->len >= fifo_len) {
tx_buf = xfer->tx_buf;
rx_buf = xfer->rx_buf;
origin_len = xfer->len;
-
target_len = xfer->len;
- if (xfer->len > fifo_len)
- xfer->len = fifo_len;
+ xfer->len = fifo_len - 1;
}

do {
+ /* transfer size is greater than 32, change to IRQ mode */
+ if (xfer->len > S3C64XX_SPI_POLLING_SIZE)
+ use_irq = 1;
+
+ if (use_irq) {
+ reinit_completion(&sdd->xfer_completion);
+
+ rdy_lv = xfer->len;
+ /* Setup RDY_FIFO trigger Level
+ * RDY_LVL =
+ * fifo_lvl up to 64 byte -> N bytes
+ * 128 byte -> RDY_LVL * 2 bytes
+ * 256 byte -> RDY_LVL * 4 bytes
+ */
+ if (fifo_len == 128)
+ rdy_lv /= 2;
+ else if (fifo_len == 256)
+ rdy_lv /= 4;
+
+ val = readl(sdd->regs + S3C64XX_SPI_MODE_CFG);
+ val &= ~S3C64XX_SPI_MODE_RX_RDY_LVL;
+ val |= (rdy_lv << S3C64XX_SPI_MODE_RX_RDY_LVL_SHIFT);
+ writel(val, sdd->regs + S3C64XX_SPI_MODE_CFG);
+
+ /* Enable FIFO_RDY_EN IRQ */
+ val = readl(sdd->regs + S3C64XX_SPI_INT_EN);
+ writel((val | S3C64XX_SPI_INT_RX_FIFORDY_EN),
+ sdd->regs + S3C64XX_SPI_INT_EN);
+
+ }
+
spin_lock_irqsave(&sdd->lock, flags);

/* Pending only which is to be done */
@@ -793,7 +837,7 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
if (use_dma)
status = s3c64xx_wait_for_dma(sdd, xfer);
else
- status = s3c64xx_wait_for_pio(sdd, xfer);
+ status = s3c64xx_wait_for_pio(sdd, xfer, use_irq);

if (status) {
dev_err(&spi->dev,
@@ -832,8 +876,8 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
if (xfer->rx_buf)
xfer->rx_buf += xfer->len;

- if (target_len > fifo_len)
- xfer->len = fifo_len;
+ if (target_len >= fifo_len)
+ xfer->len = fifo_len - 1;
else
xfer->len = target_len;
}
@@ -1003,6 +1047,14 @@ static irqreturn_t s3c64xx_spi_irq(int irq, void *data)
dev_err(&spi->dev, "TX underrun\n");
}

+ if (val & S3C64XX_SPI_ST_RX_FIFORDY) {
+ complete(&sdd->xfer_completion);
+ /* No pending clear irq, turn-off INT_EN_RX_FIFO_RDY */
+ val = readl(sdd->regs + S3C64XX_SPI_INT_EN);
+ writel((val & ~S3C64XX_SPI_INT_RX_FIFORDY_EN),
+ sdd->regs + S3C64XX_SPI_INT_EN);
+ }
+
/* Clear the pending irq by setting and then clearing it */
writel(clr, sdd->regs + S3C64XX_SPI_PENDING_CLR);
writel(0, sdd->regs + S3C64XX_SPI_PENDING_CLR);
--
2.17.1

2023-04-19 06:47:56

by Jaewon Kim

[permalink] [raw]
Subject: [PATCH v2 3/4] spi: s3c64xx: add sleep during transfer

In polling mode, the status register is constantly read to check transfer
completion. It cause excessive CPU usage.
So, it calculates the SPI transfer time and made it sleep.

Signed-off-by: Jaewon Kim <[email protected]>
---
drivers/spi/spi-s3c64xx.c | 8 ++++++++
1 file changed, 8 insertions(+)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 886722fb40ea..cf3060b2639b 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -561,6 +561,14 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
u32 cpy_len;
u8 *buf;
int ms;
+ u32 tx_time;
+
+ /* sleep during signal transfer time */
+ status = readl(regs + S3C64XX_SPI_STATUS);
+ if (RX_FIFO_LVL(status, sdd) < xfer->len) {
+ tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
+ usleep_range(tx_time / 2, tx_time);
+ }

/* millisecs to xfer 'len' bytes @ 'cur_speed' */
ms = xfer->len * 8 * 1000 / sdd->cur_speed;
--
2.17.1

2023-04-19 08:06:12

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 0/4] Improves polling mode of s3c64xx driver

On 19/04/2023 08:06, Jaewon Kim wrote:
> 1.
> s3cx64xx driver was supporting polling mode using quirk for SOC without DMA.
> However, in order to use PIO mode as an optional rather than a quirk, when DMA
> is not described, spi operates with pio mode rather than probe fail.
>
> 2.
> Fixed the problem of high CPU usage in PIO mode.
>
> 3.
> If the transfer data size is larger than 32-bit, IRQ base PIO mode used.
>

What changed in the patches? You need to provide changelog.

Best regards,
Krzysztof

2023-04-19 08:06:40

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] spi: s3c64xx: changed to PIO mode if there is no DMA

On 19/04/2023 08:06, Jaewon Kim wrote:
> Polling mode supported with qurik if there was no DMA in the SOC.

typo: quirk
You missed verb in your first part of sentence. I don't understand it.

> However, there are cased where we cannot or do not want to use DMA.
> To support this case, if DMA is not set, it is switched to polling mode.
>
> Signed-off-by: Jaewon Kim <[email protected]>
> ---
> drivers/spi/spi-s3c64xx.c | 8 ++++++--
> include/linux/platform_data/spi-s3c64xx.h | 1 +
> 2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
> index 71d324ec9a70..273aa02322d9 100644
> --- a/drivers/spi/spi-s3c64xx.c
> +++ b/drivers/spi/spi-s3c64xx.c
> @@ -19,7 +19,6 @@
> #include <linux/platform_data/spi-s3c64xx.h>
>
> #define MAX_SPI_PORTS 12
> -#define S3C64XX_SPI_QUIRK_POLL (1 << 0)
> #define S3C64XX_SPI_QUIRK_CS_AUTO (1 << 1)
> #define AUTOSUSPEND_TIMEOUT 2000
>
> @@ -116,7 +115,7 @@
> #define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT
>
> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
> -#define is_polling(x) (x->port_conf->quirks & S3C64XX_SPI_QUIRK_POLL)
> +#define is_polling(x) (x->cntrlr_info->polling)
>
> #define RXBUSY (1<<2)
> #define TXBUSY (1<<3)
> @@ -1067,6 +1066,11 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
> sci->num_cs = temp;
> }
>
> + if (!of_find_property(dev->of_node, "dmas", NULL)) {
> + dev_warn(dev, "cannot find DMA, changed to PIO mode\n");

You said it is desired option, so should not be a warning. I would make
it debug at most.

> + sci->polling = 1;



Best regards,
Krzysztof

2023-04-19 08:28:12

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] spi: s3c64xx: support interrupt based pio mode

On 19/04/2023 08:06, Jaewon Kim wrote:
> Interrupt based pio mode is supported to reduce CPU load.
> If transfer size is larger than 32 byte, it is processed using interrupt.
>
> Signed-off-by: Jaewon Kim <[email protected]>
> ---
> drivers/spi/spi-s3c64xx.c | 82 ++++++++++++++++++++++++++++++++-------
> 1 file changed, 67 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
> index cf3060b2639b..ce1afb9a4ed4 100644
> --- a/drivers/spi/spi-s3c64xx.c
> +++ b/drivers/spi/spi-s3c64xx.c
> @@ -58,6 +58,8 @@
> #define S3C64XX_SPI_MODE_BUS_TSZ_HALFWORD (1<<17)
> #define S3C64XX_SPI_MODE_BUS_TSZ_WORD (2<<17)
> #define S3C64XX_SPI_MODE_BUS_TSZ_MASK (3<<17)
> +#define S3C64XX_SPI_MODE_RX_RDY_LVL GENMASK(16, 11)
> +#define S3C64XX_SPI_MODE_RX_RDY_LVL_SHIFT 11
> #define S3C64XX_SPI_MODE_SELF_LOOPBACK (1<<3)
> #define S3C64XX_SPI_MODE_RXDMA_ON (1<<2)
> #define S3C64XX_SPI_MODE_TXDMA_ON (1<<1)
> @@ -114,6 +116,8 @@
>
> #define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT
>
> +#define S3C64XX_SPI_POLLING_SIZE 32
> +
> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
> #define is_polling(x) (x->cntrlr_info->polling)
>
> @@ -552,10 +556,11 @@ static int s3c64xx_wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
> }
>
> static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
> - struct spi_transfer *xfer)
> + struct spi_transfer *xfer, int use_irq)
> {
> void __iomem *regs = sdd->regs;
> unsigned long val;
> + unsigned long time;
> u32 status;
> int loops;
> u32 cpy_len;
> @@ -563,17 +568,24 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
> int ms;
> u32 tx_time;
>
> - /* sleep during signal transfer time */
> - status = readl(regs + S3C64XX_SPI_STATUS);
> - if (RX_FIFO_LVL(status, sdd) < xfer->len) {
> - tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
> - usleep_range(tx_time / 2, tx_time);
> - }

You just added this code. Adding and immediately removing it, suggests
this should be one patch.


Best regards,
Krzysztof

2023-04-19 08:31:07

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 2/4] spi: s3c64xx: add cpu_relax in polling loop

On 19/04/2023 08:06, Jaewon Kim wrote:
> Adds cpu_relax() to prevent long busy-wait.

How cpu_relax prevents long waiting?

> There is busy-wait loop to check data transfer completion in polling mode.
>
> Signed-off-by: Jaewon Kim <[email protected]>
> ---
> drivers/spi/spi-s3c64xx.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
> index 273aa02322d9..886722fb40ea 100644
> --- a/drivers/spi/spi-s3c64xx.c
> +++ b/drivers/spi/spi-s3c64xx.c
> @@ -568,6 +568,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>
> val = msecs_to_loops(ms);
> do {
> + cpu_relax();

Shouldn't this be just readl_poll_timeout()? Or the syntax would be too
complicated?

> status = readl(regs + S3C64XX_SPI_STATUS);
> } while (RX_FIFO_LVL(status, sdd) < xfer->len && --val);
>

Best regards,
Krzysztof

2023-04-19 08:31:17

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] spi: s3c64xx: add sleep during transfer

On 19/04/2023 08:06, Jaewon Kim wrote:
> In polling mode, the status register is constantly read to check transfer
> completion. It cause excessive CPU usage.
> So, it calculates the SPI transfer time and made it sleep.
>
> Signed-off-by: Jaewon Kim <[email protected]>
> ---
> drivers/spi/spi-s3c64xx.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
> index 886722fb40ea..cf3060b2639b 100644
> --- a/drivers/spi/spi-s3c64xx.c
> +++ b/drivers/spi/spi-s3c64xx.c
> @@ -561,6 +561,14 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
> u32 cpy_len;
> u8 *buf;
> int ms;
> + u32 tx_time;
> +
> + /* sleep during signal transfer time */
> + status = readl(regs + S3C64XX_SPI_STATUS);
> + if (RX_FIFO_LVL(status, sdd) < xfer->len) {
> + tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
> + usleep_range(tx_time / 2, tx_time);
> + }

Did you actually check the delays introduced by it? Is it worth?

>
> /* millisecs to xfer 'len' bytes @ 'cur_speed' */
> ms = xfer->len * 8 * 1000 / sdd->cur_speed;

You have now some code duplication so this could be combined.

Best regards,
Krzysztof

2023-04-19 08:34:51

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 0/4] Improves polling mode of s3c64xx driver


On 23. 4. 19. 16:59, Krzysztof Kozlowski wrote:
> On 19/04/2023 08:06, Jaewon Kim wrote:
>> 1.
>> s3cx64xx driver was supporting polling mode using quirk for SOC without DMA.
>> However, in order to use PIO mode as an optional rather than a quirk, when DMA
>> is not described, spi operates with pio mode rather than probe fail.
>>
>> 2.
>> Fixed the problem of high CPU usage in PIO mode.
>>
>> 3.
>> If the transfer data size is larger than 32-bit, IRQ base PIO mode used.
>>
> What changed in the patches? You need to provide changelog.


Oh, I missed changes while copy/pasting.

I will add changes v2 from v3 together.

Changes in V2.
- DeviceTree property not used to change PIO mod.
- Add cpu_releax() in polling loop
- Add lower limit in IRQ mode


>
> Best regards,
> Krzysztof
>
>
Thanks

Jaewon Kim

2023-04-19 08:34:55

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] spi: s3c64xx: changed to PIO mode if there is no DMA


On 23. 4. 19. 17:03, Krzysztof Kozlowski wrote:
> On 19/04/2023 08:06, Jaewon Kim wrote:
>> Polling mode supported with qurik if there was no DMA in the SOC.
> typo: quirk
> You missed verb in your first part of sentence. I don't understand it.

Sorry, I change this sentence like below.

Polling mode supported as a quirk for SOCs without DMA.

>
>> However, there are cased where we cannot or do not want to use DMA.
>> To support this case, if DMA is not set, it is switched to polling mode.
>>
>> Signed-off-by: Jaewon Kim <[email protected]>
>> ---
>> drivers/spi/spi-s3c64xx.c | 8 ++++++--
>> include/linux/platform_data/spi-s3c64xx.h | 1 +
>> 2 files changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>> index 71d324ec9a70..273aa02322d9 100644
>> --- a/drivers/spi/spi-s3c64xx.c
>> +++ b/drivers/spi/spi-s3c64xx.c
>> @@ -19,7 +19,6 @@
>> #include <linux/platform_data/spi-s3c64xx.h>
>>
>> #define MAX_SPI_PORTS 12
>> -#define S3C64XX_SPI_QUIRK_POLL (1 << 0)
>> #define S3C64XX_SPI_QUIRK_CS_AUTO (1 << 1)
>> #define AUTOSUSPEND_TIMEOUT 2000
>>
>> @@ -116,7 +115,7 @@
>> #define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT
>>
>> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
>> -#define is_polling(x) (x->port_conf->quirks & S3C64XX_SPI_QUIRK_POLL)
>> +#define is_polling(x) (x->cntrlr_info->polling)
>>
>> #define RXBUSY (1<<2)
>> #define TXBUSY (1<<3)
>> @@ -1067,6 +1066,11 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
>> sci->num_cs = temp;
>> }
>>
>> + if (!of_find_property(dev->of_node, "dmas", NULL)) {
>> + dev_warn(dev, "cannot find DMA, changed to PIO mode\n");
> You said it is desired option, so should not be a warning. I would make
> it debug at most.
>
Okay, I will change dev_warn() to dev_dbg().


>> + sci->polling = 1;
>
>
> Best regards,
> Krzysztof
>
>

Thanks

Jaewon Kim

2023-04-19 08:37:28

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] spi: s3c64xx: changed to PIO mode if there is no DMA

On 19/04/2023 10:31, Jaewon Kim wrote:
>
> On 23. 4. 19. 17:03, Krzysztof Kozlowski wrote:
>> On 19/04/2023 08:06, Jaewon Kim wrote:
>>> Polling mode supported with qurik if there was no DMA in the SOC.
>> typo: quirk
>> You missed verb in your first part of sentence. I don't understand it.
>
> Sorry, I change this sentence like below.
>
> Polling mode supported as a quirk for SOCs without DMA.

I would say still verb is missing as supported in past tense does not
make sense.

See:
https://elixir.bootlin.com/linux/v5.17.1/source/Documentation/process/submitting-patches.rst#L95

>


Best regards,
Krzysztof

2023-04-19 09:53:39

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] spi: s3c64xx: add sleep during transfer


On 23. 4. 19. 17:19, Krzysztof Kozlowski wrote:
> On 19/04/2023 08:06, Jaewon Kim wrote:
>> In polling mode, the status register is constantly read to check transfer
>> completion. It cause excessive CPU usage.
>> So, it calculates the SPI transfer time and made it sleep.
>>
>> Signed-off-by: Jaewon Kim <[email protected]>
>> ---
>> drivers/spi/spi-s3c64xx.c | 8 ++++++++
>> 1 file changed, 8 insertions(+)
>>
>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>> index 886722fb40ea..cf3060b2639b 100644
>> --- a/drivers/spi/spi-s3c64xx.c
>> +++ b/drivers/spi/spi-s3c64xx.c
>> @@ -561,6 +561,14 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>> u32 cpy_len;
>> u8 *buf;
>> int ms;
>> + u32 tx_time;
>> +
>> + /* sleep during signal transfer time */
>> + status = readl(regs + S3C64XX_SPI_STATUS);
>> + if (RX_FIFO_LVL(status, sdd) < xfer->len) {
>> + tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
>> + usleep_range(tx_time / 2, tx_time);
>> + }
> Did you actually check the delays introduced by it? Is it worth?

Yes, I already test it.

Throughput was the same, CPU utilization decreased to 30~40% from 100%.

Tested board is ExynosAutov9 SADK.


>
>>
>> /* millisecs to xfer 'len' bytes @ 'cur_speed' */
>> ms = xfer->len * 8 * 1000 / sdd->cur_speed;
> You have now some code duplication so this could be combined.
>
> Best regards,
> Krzysztof
>
>
Thanks

Jaewon Kim

2023-04-19 09:58:11

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] spi: s3c64xx: support interrupt based pio mode


On 23. 4. 19. 17:21, Krzysztof Kozlowski wrote:
> On 19/04/2023 08:06, Jaewon Kim wrote:
>> Interrupt based pio mode is supported to reduce CPU load.
>> If transfer size is larger than 32 byte, it is processed using interrupt.
>>
>> Signed-off-by: Jaewon Kim <[email protected]>
>> ---
>> drivers/spi/spi-s3c64xx.c | 82 ++++++++++++++++++++++++++++++++-------
>> 1 file changed, 67 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>> index cf3060b2639b..ce1afb9a4ed4 100644
>> --- a/drivers/spi/spi-s3c64xx.c
>> +++ b/drivers/spi/spi-s3c64xx.c
>> @@ -58,6 +58,8 @@
>> #define S3C64XX_SPI_MODE_BUS_TSZ_HALFWORD (1<<17)
>> #define S3C64XX_SPI_MODE_BUS_TSZ_WORD (2<<17)
>> #define S3C64XX_SPI_MODE_BUS_TSZ_MASK (3<<17)
>> +#define S3C64XX_SPI_MODE_RX_RDY_LVL GENMASK(16, 11)
>> +#define S3C64XX_SPI_MODE_RX_RDY_LVL_SHIFT 11
>> #define S3C64XX_SPI_MODE_SELF_LOOPBACK (1<<3)
>> #define S3C64XX_SPI_MODE_RXDMA_ON (1<<2)
>> #define S3C64XX_SPI_MODE_TXDMA_ON (1<<1)
>> @@ -114,6 +116,8 @@
>>
>> #define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT
>>
>> +#define S3C64XX_SPI_POLLING_SIZE 32
>> +
>> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
>> #define is_polling(x) (x->cntrlr_info->polling)
>>
>> @@ -552,10 +556,11 @@ static int s3c64xx_wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
>> }
>>
>> static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>> - struct spi_transfer *xfer)
>> + struct spi_transfer *xfer, int use_irq)
>> {
>> void __iomem *regs = sdd->regs;
>> unsigned long val;
>> + unsigned long time;
>> u32 status;
>> int loops;
>> u32 cpy_len;
>> @@ -563,17 +568,24 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>> int ms;
>> u32 tx_time;
>>
>> - /* sleep during signal transfer time */
>> - status = readl(regs + S3C64XX_SPI_STATUS);
>> - if (RX_FIFO_LVL(status, sdd) < xfer->len) {
>> - tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
>> - usleep_range(tx_time / 2, tx_time);
>> - }
> You just added this code. Adding and immediately removing it, suggests
> this should be one patch.
>
This code has been moved, not removed.


+       if (use_irq) {
+               val = msecs_to_jiffies(ms);
+               time =
wait_for_completion_timeout(&sdd->xfer_completion, val);
+               if (!time)
+                       return -EIO;
+       } else {
+               /* sleep during signal transfer time */
+               status = readl(regs + S3C64XX_SPI_STATUS);
+               if (RX_FIFO_LVL(status, sdd) < xfer->len) {
+                       tx_time = (xfer->len * 8 * 1000 * 1000) /
sdd->cur_speed;
+                       usleep_range(tx_time / 2, tx_time);
+               }
+       }
+


> Best regards,
> Krzysztof
>
>

Thanks

Jaewon Kim

2023-04-19 11:31:38

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 2/4] spi: s3c64xx: add cpu_relax in polling loop


On 23. 4. 19. 17:14, Krzysztof Kozlowski wrote:
> On 19/04/2023 08:06, Jaewon Kim wrote:
>> Adds cpu_relax() to prevent long busy-wait.
> How cpu_relax prevents long waiting?

As I know, cpu_relax() can be converted to yield. This can prevent
excessive use of the CPU in busy-loop.

I'll replace poor sentence like below in v3.

("Adds cpu_relax() to allow CPU relaxation in busy-loop")

>> There is busy-wait loop to check data transfer completion in polling mode.
>>
>> Signed-off-by: Jaewon Kim<[email protected]>
>> ---
>> drivers/spi/spi-s3c64xx.c | 1 +
>> 1 file changed, 1 insertion(+)
>>
>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>> index 273aa02322d9..886722fb40ea 100644
>> --- a/drivers/spi/spi-s3c64xx.c
>> +++ b/drivers/spi/spi-s3c64xx.c
>> @@ -568,6 +568,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>>
>> val = msecs_to_loops(ms);
>> do {
>> + cpu_relax();
> Shouldn't this be just readl_poll_timeout()? Or the syntax would be too
> complicated?

I think we can replace this while() loop to readl_poll_timeout().

However, we should use 0 value as 'delay_us' parameter. Because delay
can affect throughput.


My purpose is add relax to this busy-loop.

we cannot give relax if we change to readl_poll_timeout().


>> status = readl(regs + S3C64XX_SPI_STATUS);
>> } while (RX_FIFO_LVL(status, sdd) < xfer->len && --val);
>>
> Best regards,
> Krzysztof
>
>
Thanks

Jaewon Kim

2023-04-19 15:56:55

by Andi Shyti

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] spi: s3c64xx: changed to PIO mode if there is no DMA

Hi Jaewon,

On Wed, Apr 19, 2023 at 03:06:36PM +0900, Jaewon Kim wrote:
> Polling mode supported with qurik if there was no DMA in the SOC.

I think you want to say here that "Through quirks we choose to
use polling mode whenever there is no DMA in the SoC".

> However, there are cased where we cannot or do not want to use DMA.

/cased/cases/

> To support this case, if DMA is not set, it is switched to polling mode.

You haven't really described what you are doing here... you could
just write something like: "Use DTS properties to select wether
to use polling or DMA mode."

Side note, please use the imperative form when you want to
describe what you have done to fix the issue.

> Signed-off-by: Jaewon Kim <[email protected]>
> ---
> drivers/spi/spi-s3c64xx.c | 8 ++++++--
> include/linux/platform_data/spi-s3c64xx.h | 1 +
> 2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
> index 71d324ec9a70..273aa02322d9 100644
> --- a/drivers/spi/spi-s3c64xx.c
> +++ b/drivers/spi/spi-s3c64xx.c
> @@ -19,7 +19,6 @@
> #include <linux/platform_data/spi-s3c64xx.h>
>
> #define MAX_SPI_PORTS 12
> -#define S3C64XX_SPI_QUIRK_POLL (1 << 0)
> #define S3C64XX_SPI_QUIRK_CS_AUTO (1 << 1)
> #define AUTOSUSPEND_TIMEOUT 2000
>
> @@ -116,7 +115,7 @@
> #define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT
>
> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
> -#define is_polling(x) (x->port_conf->quirks & S3C64XX_SPI_QUIRK_POLL)
> +#define is_polling(x) (x->cntrlr_info->polling)
>
> #define RXBUSY (1<<2)
> #define TXBUSY (1<<3)
> @@ -1067,6 +1066,11 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
> sci->num_cs = temp;
> }
>
> + if (!of_find_property(dev->of_node, "dmas", NULL)) {
> + dev_warn(dev, "cannot find DMA, changed to PIO mode\n");
> + sci->polling = 1;

sci->polling = true;

But it could be even better:

sci->polling = !of_find_property(dev->of_node, "dmas", NULL));

and you get rid of the dev_warn() that is not required.

Andi


Attachments:
(No filename) (2.12 kB)
signature.asc (235.00 B)
Download all attachments

2023-04-19 16:06:42

by Andi Shyti

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] spi: s3c64xx: add sleep during transfer

Hi Jaewon,

> >> In polling mode, the status register is constantly read to check transfer
> >> completion. It cause excessive CPU usage.
> >> So, it calculates the SPI transfer time and made it sleep.
> >>
> >> Signed-off-by: Jaewon Kim <[email protected]>
> >> ---
> >> drivers/spi/spi-s3c64xx.c | 8 ++++++++
> >> 1 file changed, 8 insertions(+)
> >>
> >> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
> >> index 886722fb40ea..cf3060b2639b 100644
> >> --- a/drivers/spi/spi-s3c64xx.c
> >> +++ b/drivers/spi/spi-s3c64xx.c
> >> @@ -561,6 +561,14 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
> >> u32 cpy_len;
> >> u8 *buf;
> >> int ms;
> >> + u32 tx_time;
> >> +
> >> + /* sleep during signal transfer time */
> >> + status = readl(regs + S3C64XX_SPI_STATUS);
> >> + if (RX_FIFO_LVL(status, sdd) < xfer->len) {
> >> + tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
> >> + usleep_range(tx_time / 2, tx_time);
> >> + }
> > Did you actually check the delays introduced by it? Is it worth?
>
> Yes, I already test it.
>
> Throughput was the same, CPU utilization decreased to 30~40% from 100%.
>
> Tested board is ExynosAutov9 SADK.
>
>
> >
> >>
> >> /* millisecs to xfer 'len' bytes @ 'cur_speed' */
> >> ms = xfer->len * 8 * 1000 / sdd->cur_speed;
> > You have now some code duplication so this could be combined.

you could put the 'if' under the 'ms = ...' and just use ms
without declaring any tx_time.

Andi


Attachments:
(No filename) (1.51 kB)
signature.asc (235.00 B)
Download all attachments

2023-04-19 16:12:55

by Andi Shyti

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] spi: s3c64xx: support interrupt based pio mode

Hi Jaewon,

On Wed, Apr 19, 2023 at 03:06:39PM +0900, Jaewon Kim wrote:
> Interrupt based pio mode is supported to reduce CPU load.
> If transfer size is larger than 32 byte, it is processed using interrupt.
>
> Signed-off-by: Jaewon Kim <[email protected]>
> ---
> drivers/spi/spi-s3c64xx.c | 82 ++++++++++++++++++++++++++++++++-------
> 1 file changed, 67 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
> index cf3060b2639b..ce1afb9a4ed4 100644
> --- a/drivers/spi/spi-s3c64xx.c
> +++ b/drivers/spi/spi-s3c64xx.c
> @@ -58,6 +58,8 @@
> #define S3C64XX_SPI_MODE_BUS_TSZ_HALFWORD (1<<17)
> #define S3C64XX_SPI_MODE_BUS_TSZ_WORD (2<<17)
> #define S3C64XX_SPI_MODE_BUS_TSZ_MASK (3<<17)
> +#define S3C64XX_SPI_MODE_RX_RDY_LVL GENMASK(16, 11)
> +#define S3C64XX_SPI_MODE_RX_RDY_LVL_SHIFT 11
> #define S3C64XX_SPI_MODE_SELF_LOOPBACK (1<<3)
> #define S3C64XX_SPI_MODE_RXDMA_ON (1<<2)
> #define S3C64XX_SPI_MODE_TXDMA_ON (1<<1)
> @@ -114,6 +116,8 @@
>
> #define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT
>
> +#define S3C64XX_SPI_POLLING_SIZE 32
> +
> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
> #define is_polling(x) (x->cntrlr_info->polling)
>
> @@ -552,10 +556,11 @@ static int s3c64xx_wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
> }
>
> static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
> - struct spi_transfer *xfer)
> + struct spi_transfer *xfer, int use_irq)

bool use_irq

> {
> void __iomem *regs = sdd->regs;
> unsigned long val;
> + unsigned long time;

this time is used only in "if (use_irq)" can you move its
declaration under the if?

> u32 status;
> int loops;
> u32 cpy_len;
> @@ -563,17 +568,24 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
> int ms;
> u32 tx_time;
>
> - /* sleep during signal transfer time */
> - status = readl(regs + S3C64XX_SPI_STATUS);
> - if (RX_FIFO_LVL(status, sdd) < xfer->len) {
> - tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
> - usleep_range(tx_time / 2, tx_time);
> - }
> -
> /* millisecs to xfer 'len' bytes @ 'cur_speed' */
> ms = xfer->len * 8 * 1000 / sdd->cur_speed;
> ms += 10; /* some tolerance */
>
> + if (use_irq) {
> + val = msecs_to_jiffies(ms);
> + time = wait_for_completion_timeout(&sdd->xfer_completion, val);
> + if (!time)
> + return -EIO;
> + } else {
> + /* sleep during signal transfer time */
> + status = readl(regs + S3C64XX_SPI_STATUS);
> + if (RX_FIFO_LVL(status, sdd) < xfer->len) {
> + tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
> + usleep_range(tx_time / 2, tx_time);

yeah... just use 'ms'.

> + }
> + }
> +
> val = msecs_to_loops(ms);
> do {
> cpu_relax();
> @@ -737,10 +749,13 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
> void *rx_buf = NULL;
> int target_len = 0, origin_len = 0;
> int use_dma = 0;
> + int use_irq = 0;
> int status;
> u32 speed;
> u8 bpw;
> unsigned long flags;
> + u32 rdy_lv;
> + u32 val;
>
> reinit_completion(&sdd->xfer_completion);
>
> @@ -761,17 +776,46 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
> sdd->rx_dma.ch && sdd->tx_dma.ch) {
> use_dma = 1;
>
> - } else if (xfer->len > fifo_len) {
> + } else if (xfer->len >= fifo_len) {
> tx_buf = xfer->tx_buf;
> rx_buf = xfer->rx_buf;
> origin_len = xfer->len;
> -
> target_len = xfer->len;
> - if (xfer->len > fifo_len)
> - xfer->len = fifo_len;
> + xfer->len = fifo_len - 1;
> }

Is this change related to this patch?

The rest looks good.

Andi


Attachments:
(No filename) (3.67 kB)
signature.asc (235.00 B)
Download all attachments

2023-04-20 15:41:49

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] spi: s3c64xx: support interrupt based pio mode

On 19/04/2023 11:45, Jaewon Kim wrote:
>>> static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>>> - struct spi_transfer *xfer)
>>> + struct spi_transfer *xfer, int use_irq)
>>> {
>>> void __iomem *regs = sdd->regs;
>>> unsigned long val;
>>> + unsigned long time;
>>> u32 status;
>>> int loops;
>>> u32 cpy_len;
>>> @@ -563,17 +568,24 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>>> int ms;
>>> u32 tx_time;
>>>
>>> - /* sleep during signal transfer time */
>>> - status = readl(regs + S3C64XX_SPI_STATUS);
>>> - if (RX_FIFO_LVL(status, sdd) < xfer->len) {
>>> - tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
>>> - usleep_range(tx_time / 2, tx_time);
>>> - }
>> You just added this code. Adding and immediately removing it, suggests
>> this should be one patch.
>>
> This code has been moved, not removed.

Move consists of remove and add. Add it in correct place since beginning.

Best regards,
Krzysztof

2023-04-20 15:41:54

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 2/4] spi: s3c64xx: add cpu_relax in polling loop

On 19/04/2023 13:13, Jaewon Kim wrote:
>
> On 23. 4. 19. 17:14, Krzysztof Kozlowski wrote:
>> On 19/04/2023 08:06, Jaewon Kim wrote:
>>> Adds cpu_relax() to prevent long busy-wait.
>> How cpu_relax prevents long waiting?
>
> As I know, cpu_relax() can be converted to yield. This can prevent
> excessive use of the CPU in busy-loop.

That's ok, you just wrote that it will prevent long waiting, so I assume
it will shorten the wait time.

>
> I'll replace poor sentence like below in v3.
>
> ("Adds cpu_relax() to allow CPU relaxation in busy-loop")
>
>>> There is busy-wait loop to check data transfer completion in polling mode.
>>>
>>> Signed-off-by: Jaewon Kim<[email protected]>
>>> ---
>>> drivers/spi/spi-s3c64xx.c | 1 +
>>> 1 file changed, 1 insertion(+)
>>>
>>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>>> index 273aa02322d9..886722fb40ea 100644
>>> --- a/drivers/spi/spi-s3c64xx.c
>>> +++ b/drivers/spi/spi-s3c64xx.c
>>> @@ -568,6 +568,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>>>
>>> val = msecs_to_loops(ms);
>>> do {
>>> + cpu_relax();
>> Shouldn't this be just readl_poll_timeout()? Or the syntax would be too
>> complicated?
>
> I think we can replace this while() loop to readl_poll_timeout().
>
> However, we should use 0 value as 'delay_us' parameter. Because delay
> can affect throughput.
>
>
> My purpose is add relax to this busy-loop.
>
> we cannot give relax if we change to readl_poll_timeout().

readl_poll_timeout() will know to do the best. You do not need to add
cpu_relax there.


Best regards,
Krzysztof

2023-04-20 15:42:22

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] spi: s3c64xx: changed to PIO mode if there is no DMA

On 19/04/2023 08:06, Jaewon Kim wrote:
> Polling mode supported with qurik if there was no DMA in the SOC.
> However, there are cased where we cannot or do not want to use DMA.
> To support this case, if DMA is not set, it is switched to polling mode.
>

(...)

> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
> -#define is_polling(x) (x->port_conf->quirks & S3C64XX_SPI_QUIRK_POLL)
> +#define is_polling(x) (x->cntrlr_info->polling)
>
> #define RXBUSY (1<<2)
> #define TXBUSY (1<<3)
> @@ -1067,6 +1066,11 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
> sci->num_cs = temp;
> }
>
> + if (!of_find_property(dev->of_node, "dmas", NULL)) {

of_property_present()


Best regards,
Krzysztof

2023-04-21 01:48:36

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] spi: s3c64xx: changed to PIO mode if there is no DMA

Hi Andi,


On 23. 4. 20. 00:46, Andi Shyti wrote:
> Hi Jaewon,
>
> On Wed, Apr 19, 2023 at 03:06:36PM +0900, Jaewon Kim wrote:
>> Polling mode supported with qurik if there was no DMA in the SOC.
> I think you want to say here that "Through quirks we choose to
> use polling mode whenever there is no DMA in the SoC".
>
>> However, there are cased where we cannot or do not want to use DMA.
> /cased/cases/
>
>> To support this case, if DMA is not set, it is switched to polling mode.
> You haven't really described what you are doing here... you could
> just write something like: "Use DTS properties to select wether
> to use polling or DMA mode."
>
> Side note, please use the imperative form when you want to
> describe what you have done to fix the issue.


Thanks for guide.

I will change description in v3.


>
>> Signed-off-by: Jaewon Kim <[email protected]>
>> ---
>> drivers/spi/spi-s3c64xx.c | 8 ++++++--
>> include/linux/platform_data/spi-s3c64xx.h | 1 +
>> 2 files changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>> index 71d324ec9a70..273aa02322d9 100644
>> --- a/drivers/spi/spi-s3c64xx.c
>> +++ b/drivers/spi/spi-s3c64xx.c
>> @@ -19,7 +19,6 @@
>> #include <linux/platform_data/spi-s3c64xx.h>
>>
>> #define MAX_SPI_PORTS 12
>> -#define S3C64XX_SPI_QUIRK_POLL (1 << 0)
>> #define S3C64XX_SPI_QUIRK_CS_AUTO (1 << 1)
>> #define AUTOSUSPEND_TIMEOUT 2000
>>
>> @@ -116,7 +115,7 @@
>> #define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT
>>
>> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
>> -#define is_polling(x) (x->port_conf->quirks & S3C64XX_SPI_QUIRK_POLL)
>> +#define is_polling(x) (x->cntrlr_info->polling)
>>
>> #define RXBUSY (1<<2)
>> #define TXBUSY (1<<3)
>> @@ -1067,6 +1066,11 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
>> sci->num_cs = temp;
>> }
>>
>> + if (!of_find_property(dev->of_node, "dmas", NULL)) {
>> + dev_warn(dev, "cannot find DMA, changed to PIO mode\n");
>> + sci->polling = 1;
> sci->polling = true;
>
> But it could be even better:
>
> sci->polling = !of_find_property(dev->of_node, "dmas", NULL));
>
> and you get rid of the dev_warn() that is not required.
>
> Andi


Okay, I will change 1 to 'true'..


Thanks

Jaewon Kim

2023-04-21 01:52:49

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 2/4] spi: s3c64xx: add cpu_relax in polling loop


On 23. 4. 21. 00:39, Krzysztof Kozlowski wrote:
> On 19/04/2023 13:13, Jaewon Kim wrote:
>> On 23. 4. 19. 17:14, Krzysztof Kozlowski wrote:
>>> On 19/04/2023 08:06, Jaewon Kim wrote:
>>>> Adds cpu_relax() to prevent long busy-wait.
>>> How cpu_relax prevents long waiting?
>> As I know, cpu_relax() can be converted to yield. This can prevent
>> excessive use of the CPU in busy-loop.
> That's ok, you just wrote that it will prevent long waiting, so I assume
> it will shorten the wait time.
>
>> I'll replace poor sentence like below in v3.
>>
>> ("Adds cpu_relax() to allow CPU relaxation in busy-loop")
>>
>>>> There is busy-wait loop to check data transfer completion in polling mode.
>>>>
>>>> Signed-off-by: Jaewon Kim<[email protected]>
>>>> ---
>>>> drivers/spi/spi-s3c64xx.c | 1 +
>>>> 1 file changed, 1 insertion(+)
>>>>
>>>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>>>> index 273aa02322d9..886722fb40ea 100644
>>>> --- a/drivers/spi/spi-s3c64xx.c
>>>> +++ b/drivers/spi/spi-s3c64xx.c
>>>> @@ -568,6 +568,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>>>>
>>>> val = msecs_to_loops(ms);
>>>> do {
>>>> + cpu_relax();
>>> Shouldn't this be just readl_poll_timeout()? Or the syntax would be too
>>> complicated?
>> I think we can replace this while() loop to readl_poll_timeout().
>>
>> However, we should use 0 value as 'delay_us' parameter. Because delay
>> can affect throughput.
>>
>>
>> My purpose is add relax to this busy-loop.
>>
>> we cannot give relax if we change to readl_poll_timeout().
> readl_poll_timeout() will know to do the best. You do not need to add
> cpu_relax there.
Okay, I will change it to readl_poll_timeout()
>
> Best regards,
> Krzysztof
>
>

Thanks

Jaewon Kim

2023-04-21 01:53:01

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] spi: s3c64xx: changed to PIO mode if there is no DMA


On 23. 4. 21. 00:40, Krzysztof Kozlowski wrote:
> On 19/04/2023 08:06, Jaewon Kim wrote:
>> Polling mode supported with qurik if there was no DMA in the SOC.
>> However, there are cased where we cannot or do not want to use DMA.
>> To support this case, if DMA is not set, it is switched to polling mode.
>>
> (...)
>
>> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
>> -#define is_polling(x) (x->port_conf->quirks & S3C64XX_SPI_QUIRK_POLL)
>> +#define is_polling(x) (x->cntrlr_info->polling)
>>
>> #define RXBUSY (1<<2)
>> #define TXBUSY (1<<3)
>> @@ -1067,6 +1066,11 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
>> sci->num_cs = temp;
>> }
>>
>> + if (!of_find_property(dev->of_node, "dmas", NULL)) {
> of_property_present()

I will change it to of_property_present().


>
> Best regards,
> Krzysztof
>
>
Thanks

Jaewon Kim

2023-04-21 03:11:38

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] spi: s3c64xx: add sleep during transfer

Hi Andi,


On 23. 4. 20. 00:56, Andi Shyti wrote:
> Hi Jaewon,
>
>>>> In polling mode, the status register is constantly read to check transfer
>>>> completion. It cause excessive CPU usage.
>>>> So, it calculates the SPI transfer time and made it sleep.
>>>>
>>>> Signed-off-by: Jaewon Kim <[email protected]>
>>>> ---
>>>> drivers/spi/spi-s3c64xx.c | 8 ++++++++
>>>> 1 file changed, 8 insertions(+)
>>>>
>>>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>>>> index 886722fb40ea..cf3060b2639b 100644
>>>> --- a/drivers/spi/spi-s3c64xx.c
>>>> +++ b/drivers/spi/spi-s3c64xx.c
>>>> @@ -561,6 +561,14 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>>>> u32 cpy_len;
>>>> u8 *buf;
>>>> int ms;
>>>> + u32 tx_time;
>>>> +
>>>> + /* sleep during signal transfer time */
>>>> + status = readl(regs + S3C64XX_SPI_STATUS);
>>>> + if (RX_FIFO_LVL(status, sdd) < xfer->len) {
>>>> + tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
>>>> + usleep_range(tx_time / 2, tx_time);
>>>> + }
>>> Did you actually check the delays introduced by it? Is it worth?
>> Yes, I already test it.
>>
>> Throughput was the same, CPU utilization decreased to 30~40% from 100%.
>>
>> Tested board is ExynosAutov9 SADK.
>>
>>
>>>>
>>>> /* millisecs to xfer 'len' bytes @ 'cur_speed' */
>>>> ms = xfer->len * 8 * 1000 / sdd->cur_speed;
>>> You have now some code duplication so this could be combined.
> you could put the 'if' under the 'ms = ...' and just use ms
> without declaring any tx_time.
>
> Andi


The unit of 'tx_time' is 'us'.


tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;

ms = xfer->len * 8 * 1000 / sdd->cur_speed;


I add tx_time to minimize existing code modifications.

If we are not using tx_time, we need to change ms to us and change the
related code.


Thanks

Jaewon Kim


2023-04-21 03:12:09

by Jaewon Kim

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] spi: s3c64xx: support interrupt based pio mode

Hi Andy,


On 23. 4. 20. 01:03, Andi Shyti wrote:
> Hi Jaewon,
>
> On Wed, Apr 19, 2023 at 03:06:39PM +0900, Jaewon Kim wrote:
>> Interrupt based pio mode is supported to reduce CPU load.
>> If transfer size is larger than 32 byte, it is processed using interrupt.
>>
>> Signed-off-by: Jaewon Kim <[email protected]>
>> ---
>> drivers/spi/spi-s3c64xx.c | 82 ++++++++++++++++++++++++++++++++-------
>> 1 file changed, 67 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
>> index cf3060b2639b..ce1afb9a4ed4 100644
>> --- a/drivers/spi/spi-s3c64xx.c
>> +++ b/drivers/spi/spi-s3c64xx.c
>> @@ -58,6 +58,8 @@
>> #define S3C64XX_SPI_MODE_BUS_TSZ_HALFWORD (1<<17)
>> #define S3C64XX_SPI_MODE_BUS_TSZ_WORD (2<<17)
>> #define S3C64XX_SPI_MODE_BUS_TSZ_MASK (3<<17)
>> +#define S3C64XX_SPI_MODE_RX_RDY_LVL GENMASK(16, 11)
>> +#define S3C64XX_SPI_MODE_RX_RDY_LVL_SHIFT 11
>> #define S3C64XX_SPI_MODE_SELF_LOOPBACK (1<<3)
>> #define S3C64XX_SPI_MODE_RXDMA_ON (1<<2)
>> #define S3C64XX_SPI_MODE_TXDMA_ON (1<<1)
>> @@ -114,6 +116,8 @@
>>
>> #define S3C64XX_SPI_TRAILCNT S3C64XX_SPI_MAX_TRAILCNT
>>
>> +#define S3C64XX_SPI_POLLING_SIZE 32
>> +
>> #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
>> #define is_polling(x) (x->cntrlr_info->polling)
>>
>> @@ -552,10 +556,11 @@ static int s3c64xx_wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
>> }
>>
>> static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>> - struct spi_transfer *xfer)
>> + struct spi_transfer *xfer, int use_irq)
> bool use_irq

Okay, I will change it to bool.

>
>> {
>> void __iomem *regs = sdd->regs;
>> unsigned long val;
>> + unsigned long time;
> this time is used only in "if (use_irq)" can you move its
> declaration under the if?
>
>> u32 status;
>> int loops;
>> u32 cpy_len;
>> @@ -563,17 +568,24 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
>> int ms;
>> u32 tx_time;
>>
>> - /* sleep during signal transfer time */
>> - status = readl(regs + S3C64XX_SPI_STATUS);
>> - if (RX_FIFO_LVL(status, sdd) < xfer->len) {
>> - tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
>> - usleep_range(tx_time / 2, tx_time);
>> - }
>> -
>> /* millisecs to xfer 'len' bytes @ 'cur_speed' */
>> ms = xfer->len * 8 * 1000 / sdd->cur_speed;
>> ms += 10; /* some tolerance */
>>
>> + if (use_irq) {
>> + val = msecs_to_jiffies(ms);
>> + time = wait_for_completion_timeout(&sdd->xfer_completion, val);
>> + if (!time)
>> + return -EIO;
>> + } else {
>> + /* sleep during signal transfer time */
>> + status = readl(regs + S3C64XX_SPI_STATUS);
>> + if (RX_FIFO_LVL(status, sdd) < xfer->len) {
>> + tx_time = (xfer->len * 8 * 1000 * 1000) / sdd->cur_speed;
>> + usleep_range(tx_time / 2, tx_time);
> yeah... just use 'ms'.
As I mentioned in the previous mail, the unit of tx_time is us.
>
>> + }
>> + }
>> +
>> val = msecs_to_loops(ms);
>> do {
>> cpu_relax();
>> @@ -737,10 +749,13 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
>> void *rx_buf = NULL;
>> int target_len = 0, origin_len = 0;
>> int use_dma = 0;
>> + int use_irq = 0;
>> int status;
>> u32 speed;
>> u8 bpw;
>> unsigned long flags;
>> + u32 rdy_lv;
>> + u32 val;
>>
>> reinit_completion(&sdd->xfer_completion);
>>
>> @@ -761,17 +776,46 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
>> sdd->rx_dma.ch && sdd->tx_dma.ch) {
>> use_dma = 1;
>>
>> - } else if (xfer->len > fifo_len) {
>> + } else if (xfer->len >= fifo_len) {
>> tx_buf = xfer->tx_buf;
>> rx_buf = xfer->rx_buf;
>> origin_len = xfer->len;
>> -
>> target_len = xfer->len;
>> - if (xfer->len > fifo_len)
>> - xfer->len = fifo_len;
>> + xfer->len = fifo_len - 1;
>> }
> Is this change related to this patch?

Yes, it is related to this patch.

If data is filled as much as the size of FIFO, underrun/overrun IRQ occurs.

In CPU polling mode, it did not occur because the FIFO was read before
the IRQ was set.

So, I set xfer->len to fifo_len-1.

>
> The rest looks good.
>
> Andi


Thanks

Jaewon Kim