There's a bunch of overhead in spi-geni-qcom's prepare_message. Get
rid of it. Before this change spi_geni_prepare_message() took around
14.5 us. After this change, spi_geni_prepare_message() takes about
1.75 us (as measured by ftrace).
What's here:
* We're always in FIFO mode, so no need to call it for every transfer.
This avoids a whole ton of readl/writel calls.
* We don't need to write a whole pile of config registers if the mode
isn't changing. Cache the last mode and only do the work if needed.
* For several registers we were trying to do read/modify/write, but
there was no reason. The registers only have one thing in them, so
just write them.
Signed-off-by: Douglas Anderson <[email protected]>
---
drivers/spi/spi-geni-qcom.c | 54 +++++++++++++++++--------------------
1 file changed, 24 insertions(+), 30 deletions(-)
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index f51279608fc7..97fac5ea6afd 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -77,6 +77,7 @@ struct spi_geni_master {
u32 tx_fifo_depth;
u32 fifo_width_bits;
u32 tx_wm;
+ u32 last_mode;
unsigned long cur_speed_hz;
unsigned int cur_bits_per_word;
unsigned int tx_rem_bytes;
@@ -177,8 +178,6 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
struct geni_se *se = &mas->se;
u32 word_len;
- word_len = readl(se->base + SE_SPI_WORD_LEN);
-
/*
* If bits_per_word isn't a byte aligned value, set the packing to be
* 1 SPI word per FIFO word.
@@ -187,10 +186,9 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
pack_words = mas->fifo_width_bits / bits_per_word;
else
pack_words = 1;
- word_len &= ~WORD_LEN_MSK;
- word_len |= ((bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK);
geni_se_config_packing(&mas->se, bits_per_word, pack_words, msb_first,
true, true);
+ word_len = (bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK;
writel(word_len, se->base + SE_SPI_WORD_LEN);
}
@@ -238,38 +236,34 @@ static int setup_fifo_params(struct spi_device *spi_slv,
{
struct spi_geni_master *mas = spi_master_get_devdata(spi);
struct geni_se *se = &mas->se;
- u32 loopback_cfg, cpol, cpha, demux_output_inv;
+ u32 loopback_cfg = 0, cpol = 0, cpha = 0, demux_output_inv = 0;
u32 demux_sel;
- loopback_cfg = readl(se->base + SE_SPI_LOOPBACK);
- cpol = readl(se->base + SE_SPI_CPOL);
- cpha = readl(se->base + SE_SPI_CPHA);
- demux_output_inv = 0;
- loopback_cfg &= ~LOOPBACK_MSK;
- cpol &= ~CPOL;
- cpha &= ~CPHA;
+ if (mas->last_mode != spi_slv->mode) {
+ if (spi_slv->mode & SPI_LOOP)
+ loopback_cfg = LOOPBACK_ENABLE;
- if (spi_slv->mode & SPI_LOOP)
- loopback_cfg |= LOOPBACK_ENABLE;
+ if (spi_slv->mode & SPI_CPOL)
+ cpol = CPOL;
- if (spi_slv->mode & SPI_CPOL)
- cpol |= CPOL;
+ if (spi_slv->mode & SPI_CPHA)
+ cpha = CPHA;
- if (spi_slv->mode & SPI_CPHA)
- cpha |= CPHA;
+ if (spi_slv->mode & SPI_CS_HIGH)
+ demux_output_inv = BIT(spi_slv->chip_select);
- if (spi_slv->mode & SPI_CS_HIGH)
- demux_output_inv = BIT(spi_slv->chip_select);
+ demux_sel = spi_slv->chip_select;
+ mas->cur_bits_per_word = spi_slv->bits_per_word;
- demux_sel = spi_slv->chip_select;
- mas->cur_bits_per_word = spi_slv->bits_per_word;
+ spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
+ writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
+ writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
+ writel(cpha, se->base + SE_SPI_CPHA);
+ writel(cpol, se->base + SE_SPI_CPOL);
+ writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
- spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
- writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
- writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
- writel(cpha, se->base + SE_SPI_CPHA);
- writel(cpol, se->base + SE_SPI_CPOL);
- writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
+ mas->last_mode = spi_slv->mode;
+ }
return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz);
}
@@ -279,9 +273,7 @@ static int spi_geni_prepare_message(struct spi_master *spi,
{
int ret;
struct spi_geni_master *mas = spi_master_get_devdata(spi);
- struct geni_se *se = &mas->se;
- geni_se_select_mode(se, GENI_SE_FIFO);
ret = setup_fifo_params(spi_msg->spi, spi);
if (ret)
dev_err(mas->dev, "Couldn't select mode %d\n", ret);
@@ -322,6 +314,8 @@ static int spi_geni_init(struct spi_geni_master *mas)
else
mas->oversampling = 1;
+ geni_se_select_mode(se, GENI_SE_FIFO);
+
pm_runtime_put(mas->dev);
return 0;
}
--
2.27.0.383.g050319c2ae-goog
On 7/2/2020 6:15 AM, Douglas Anderson wrote:
> There's a bunch of overhead in spi-geni-qcom's prepare_message. Get
> rid of it. Before this change spi_geni_prepare_message() took around
> 14.5 us. After this change, spi_geni_prepare_message() takes about
> 1.75 us (as measured by ftrace).
>
> What's here:
> * We're always in FIFO mode, so no need to call it for every transfer.
> This avoids a whole ton of readl/writel calls.
> * We don't need to write a whole pile of config registers if the mode
> isn't changing. Cache the last mode and only do the work if needed.
> * For several registers we were trying to do read/modify/write, but
> there was no reason. The registers only have one thing in them, so
> just write them.
>
> Signed-off-by: Douglas Anderson <[email protected]>
> ---
>
> drivers/spi/spi-geni-qcom.c | 54 +++++++++++++++++--------------------
> 1 file changed, 24 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
> index f51279608fc7..97fac5ea6afd 100644
> --- a/drivers/spi/spi-geni-qcom.c
> +++ b/drivers/spi/spi-geni-qcom.c
> @@ -77,6 +77,7 @@ struct spi_geni_master {
> u32 tx_fifo_depth;
> u32 fifo_width_bits;
> u32 tx_wm;
> + u32 last_mode;
> unsigned long cur_speed_hz;
> unsigned int cur_bits_per_word;
> unsigned int tx_rem_bytes;
> @@ -177,8 +178,6 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
> struct geni_se *se = &mas->se;
> u32 word_len;
>
> - word_len = readl(se->base + SE_SPI_WORD_LEN);
> -
> /*
> * If bits_per_word isn't a byte aligned value, set the packing to be
> * 1 SPI word per FIFO word.
> @@ -187,10 +186,9 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
> pack_words = mas->fifo_width_bits / bits_per_word;
> else
> pack_words = 1;
> - word_len &= ~WORD_LEN_MSK;
> - word_len |= ((bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK);
> geni_se_config_packing(&mas->se, bits_per_word, pack_words, msb_first,
> true, true);
> + word_len = (bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK;
> writel(word_len, se->base + SE_SPI_WORD_LEN);
> }
>
> @@ -238,38 +236,34 @@ static int setup_fifo_params(struct spi_device *spi_slv,
> {
> struct spi_geni_master *mas = spi_master_get_devdata(spi);
> struct geni_se *se = &mas->se;
> - u32 loopback_cfg, cpol, cpha, demux_output_inv;
> + u32 loopback_cfg = 0, cpol = 0, cpha = 0, demux_output_inv = 0;
> u32 demux_sel;
>
> - loopback_cfg = readl(se->base + SE_SPI_LOOPBACK);
> - cpol = readl(se->base + SE_SPI_CPOL);
> - cpha = readl(se->base + SE_SPI_CPHA);
> - demux_output_inv = 0;
> - loopback_cfg &= ~LOOPBACK_MSK;
> - cpol &= ~CPOL;
> - cpha &= ~CPHA;
> + if (mas->last_mode != spi_slv->mode) {
> + if (spi_slv->mode & SPI_LOOP)
> + loopback_cfg = LOOPBACK_ENABLE;
>
> - if (spi_slv->mode & SPI_LOOP)
> - loopback_cfg |= LOOPBACK_ENABLE;
> + if (spi_slv->mode & SPI_CPOL)
> + cpol = CPOL;
>
> - if (spi_slv->mode & SPI_CPOL)
> - cpol |= CPOL;
> + if (spi_slv->mode & SPI_CPHA)
> + cpha = CPHA;
>
> - if (spi_slv->mode & SPI_CPHA)
> - cpha |= CPHA;
> + if (spi_slv->mode & SPI_CS_HIGH)
> + demux_output_inv = BIT(spi_slv->chip_select);
>
> - if (spi_slv->mode & SPI_CS_HIGH)
> - demux_output_inv = BIT(spi_slv->chip_select);
> + demux_sel = spi_slv->chip_select;
> + mas->cur_bits_per_word = spi_slv->bits_per_word;
>
> - demux_sel = spi_slv->chip_select;
> - mas->cur_bits_per_word = spi_slv->bits_per_word;
> + spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
> + writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
> + writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
> + writel(cpha, se->base + SE_SPI_CPHA);
> + writel(cpol, se->base + SE_SPI_CPOL);
> + writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
>
> - spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
> - writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
> - writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
> - writel(cpha, se->base + SE_SPI_CPHA);
> - writel(cpol, se->base + SE_SPI_CPOL);
> - writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
> + mas->last_mode = spi_slv->mode;
> + }
>
> return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz);
> }
Yeah looks good to me, the default/reset value of these registers are 0
we don't have to preserve any bits here.
We can directly update the register with required value.
Reviewed-by: Akash Asthana <[email protected]>
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,\na Linux Foundation Collaborative Project
On Wed, Jul 01, 2020 at 05:45:09PM -0700, Douglas Anderson wrote:
> There's a bunch of overhead in spi-geni-qcom's prepare_message. Get
> rid of it. Before this change spi_geni_prepare_message() took around
> 14.5 us. After this change, spi_geni_prepare_message() takes about
> 1.75 us (as measured by ftrace).
Acked-by: Mark Brown <[email protected]>