From: Thor Thayer <[email protected]>
The current Cadence QSPI driver caused a kernel panic when loading
a Root Filesystem from QSPI. The problem was caused by reading more
bytes than needed because the QSPI operated on 4 bytes at a time.
<snip>
[ 7.947754] spi_nor_read[1048]:from 0x037cad74, len 1 [bfe07fff]
[ 7.956247] cqspi_read[910]:offset 0x58502516, buffer=bfe07fff
[ 7.956247]
[ 7.966046] Unable to handle kernel paging request at virtual
address bfe08002
[ 7.973239] pgd = eebfc000
[ 7.975931] [bfe08002] *pgd=2fffb811, *pte=00000000, *ppte=00000000
</snip>
Notice above how only 1 byte needed to be read but by reading 4 bytes
into the end of a mapped page, a unrecoverable page fault occurred.
This patch uses a temporary buffer to hold the 4 bytes read and then
copies only the bytes required into the buffer. A min() function is
used to limit the length to prevent buffer overflows.
Similar changes were made for the write routine.
Signed-off-by: Thor Thayer <[email protected]>
---
drivers/mtd/spi-nor/cadence-quadspi.c | 39 ++++++++++++++++++++++++++++-------
1 file changed, 31 insertions(+), 8 deletions(-)
diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index 4b8e9183489a..b22ed982f896 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -501,7 +501,8 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
void __iomem *reg_base = cqspi->iobase;
void __iomem *ahb_base = cqspi->ahb_base;
unsigned int remaining = n_rx;
- unsigned int bytes_to_read = 0;
+ unsigned int mod_bytes, words_to_read, bytes_to_read = 0;
+ u8 *rxbuf_end = rxbuf + n_rx;
int ret = 0;
writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR);
@@ -533,9 +534,21 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
bytes_to_read *= cqspi->fifo_width;
bytes_to_read = bytes_to_read > remaining ?
remaining : bytes_to_read;
- ioread32_rep(ahb_base, rxbuf,
- DIV_ROUND_UP(bytes_to_read, 4));
- rxbuf += bytes_to_read;
+ /* Read 4 byte chunks before using single byte mode */
+ words_to_read = bytes_to_read / 4;
+ mod_bytes = bytes_to_read % 4;
+ if (words_to_read) {
+ ioread32_rep(ahb_base, rxbuf, words_to_read);
+ rxbuf += (words_to_read * 4);
+ }
+ if (mod_bytes) {
+ unsigned int temp = ioread32(ahb_base);
+
+ memcpy(rxbuf, &temp, min((unsigned int)
+ (rxbuf_end - rxbuf),
+ mod_bytes));
+ rxbuf += mod_bytes;
+ }
remaining -= bytes_to_read;
bytes_to_read = cqspi_get_rd_sram_level(cqspi);
}
@@ -599,7 +612,7 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, loff_t to_addr,
struct cqspi_st *cqspi = f_pdata->cqspi;
void __iomem *reg_base = cqspi->iobase;
unsigned int remaining = n_tx;
- unsigned int write_bytes;
+ unsigned int mod_bytes, write_bytes, write_words;
int ret;
writel(to_addr, reg_base + CQSPI_REG_INDIRECTWRSTARTADDR);
@@ -625,9 +638,20 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, loff_t to_addr,
while (remaining > 0) {
write_bytes = remaining > page_size ? page_size : remaining;
- iowrite32_rep(cqspi->ahb_base, txbuf,
- DIV_ROUND_UP(write_bytes, 4));
+ write_words = write_bytes / 4;
+ mod_bytes = write_bytes % 4;
+ /* Write 4 bytes at a time then single bytes. */
+ if (write_words) {
+ iowrite32_rep(cqspi->ahb_base, txbuf, write_words);
+ txbuf += (write_words * 4);
+ }
+ if (mod_bytes) {
+ unsigned int temp = 0xFFFFFFFF;
+ memcpy(&temp, txbuf, min(sizeof(temp), mod_bytes));
+ iowrite32(temp, cqspi->ahb_base);
+ txbuf += mod_bytes;
+ }
ret = wait_for_completion_timeout(&cqspi->transfer_complete,
msecs_to_jiffies
(CQSPI_TIMEOUT_MS));
@@ -637,7 +661,6 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, loff_t to_addr,
goto failwr;
}
- txbuf += write_bytes;
remaining -= write_bytes;
if (remaining > 0)
--
2.7.4
On 03/19/2018 07:45 PM, [email protected] wrote:
> From: Thor Thayer <[email protected]>
>
> The current Cadence QSPI driver caused a kernel panic when loading
> a Root Filesystem from QSPI. The problem was caused by reading more
> bytes than needed because the QSPI operated on 4 bytes at a time.
> <snip>
> [ 7.947754] spi_nor_read[1048]:from 0x037cad74, len 1 [bfe07fff]
> [ 7.956247] cqspi_read[910]:offset 0x58502516, buffer=bfe07fff
> [ 7.956247]
> [ 7.966046] Unable to handle kernel paging request at virtual
> address bfe08002
> [ 7.973239] pgd = eebfc000
> [ 7.975931] [bfe08002] *pgd=2fffb811, *pte=00000000, *ppte=00000000
> </snip>
> Notice above how only 1 byte needed to be read but by reading 4 bytes
> into the end of a mapped page, a unrecoverable page fault occurred.
>
> This patch uses a temporary buffer to hold the 4 bytes read and then
> copies only the bytes required into the buffer. A min() function is
> used to limit the length to prevent buffer overflows.
>
> Similar changes were made for the write routine.
>
> Signed-off-by: Thor Thayer <[email protected]>
> ---
> drivers/mtd/spi-nor/cadence-quadspi.c | 39 ++++++++++++++++++++++++++++-------
> 1 file changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
> index 4b8e9183489a..b22ed982f896 100644
> --- a/drivers/mtd/spi-nor/cadence-quadspi.c
> +++ b/drivers/mtd/spi-nor/cadence-quadspi.c
> @@ -501,7 +501,8 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
> void __iomem *reg_base = cqspi->iobase;
> void __iomem *ahb_base = cqspi->ahb_base;
> unsigned int remaining = n_rx;
> - unsigned int bytes_to_read = 0;
> + unsigned int mod_bytes, words_to_read, bytes_to_read = 0;
> + u8 *rxbuf_end = rxbuf + n_rx;
> int ret = 0;
>
> writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR);
> @@ -533,9 +534,21 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
> bytes_to_read *= cqspi->fifo_width;
> bytes_to_read = bytes_to_read > remaining ?
> remaining : bytes_to_read;
> - ioread32_rep(ahb_base, rxbuf,
> - DIV_ROUND_UP(bytes_to_read, 4));
> - rxbuf += bytes_to_read;
> + /* Read 4 byte chunks before using single byte mode */
> + words_to_read = bytes_to_read / 4;
> + mod_bytes = bytes_to_read % 4;
> + if (words_to_read) {
> + ioread32_rep(ahb_base, rxbuf, words_to_read);
> + rxbuf += (words_to_read * 4);
> + }
> + if (mod_bytes) {
> + unsigned int temp = ioread32(ahb_base);
> +
> + memcpy(rxbuf, &temp, min((unsigned int)
> + (rxbuf_end - rxbuf),
> + mod_bytes));
> + rxbuf += mod_bytes;
> + }
Wouldn't it make more sense to read in 4 byte increments all the time
except for the one last read instead ? This code above where you always
check for trailing bytes can make the next read cycle do ioreaad32 into
unaligned memory address and thus cause slowdown. (consider the example
where the controller first reports it has 5 bytes ready, then reports it
has 7 bytes ready. If you read 4 bytes in the first cycle, wait a bit
and then check how much data the controller has in the next cycle, it
will be 8 bytes, all nicely aligned).
Does it make sense ?
--
Best regards,
Marek Vasut
Hi Thor,
I love your patch! Perhaps something to improve:
[auto build test WARNING on v4.16-rc4]
[also build test WARNING on next-20180319]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/thor-thayer-linux-intel-com/mtd-spi-nor-Fix-Cadence-QSPI-page-fault-kernel-panic/20180320-133857
config: alpha-allmodconfig (attached as .config)
compiler: alpha-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=alpha
All warnings (new ones prefixed by >>):
In file included from include/linux/clk.h:16:0,
from drivers/mtd/spi-nor/cadence-quadspi.c:18:
drivers/mtd/spi-nor/cadence-quadspi.c: In function 'cqspi_indirect_write_execute':
include/linux/kernel.h:793:16: warning: comparison of distinct pointer types lacks a cast
(void) (&min1 == &min2); \
^
include/linux/kernel.h:802:2: note: in expansion of macro '__min'
__min(typeof(x), typeof(y), \
^~~~~
>> drivers/mtd/spi-nor/cadence-quadspi.c:651:25: note: in expansion of macro 'min'
memcpy(&temp, txbuf, min(sizeof(temp), mod_bytes));
^~~
vim +/min +651 drivers/mtd/spi-nor/cadence-quadspi.c
606
607 static int cqspi_indirect_write_execute(struct spi_nor *nor, loff_t to_addr,
608 const u8 *txbuf, const size_t n_tx)
609 {
610 const unsigned int page_size = nor->page_size;
611 struct cqspi_flash_pdata *f_pdata = nor->priv;
612 struct cqspi_st *cqspi = f_pdata->cqspi;
613 void __iomem *reg_base = cqspi->iobase;
614 unsigned int remaining = n_tx;
615 unsigned int mod_bytes, write_bytes, write_words;
616 int ret;
617
618 writel(to_addr, reg_base + CQSPI_REG_INDIRECTWRSTARTADDR);
619 writel(remaining, reg_base + CQSPI_REG_INDIRECTWRBYTES);
620
621 /* Clear all interrupts. */
622 writel(CQSPI_IRQ_STATUS_MASK, reg_base + CQSPI_REG_IRQSTATUS);
623
624 writel(CQSPI_IRQ_MASK_WR, reg_base + CQSPI_REG_IRQMASK);
625
626 reinit_completion(&cqspi->transfer_complete);
627 writel(CQSPI_REG_INDIRECTWR_START_MASK,
628 reg_base + CQSPI_REG_INDIRECTWR);
629 /*
630 * As per 66AK2G02 TRM SPRUHY8F section 11.15.5.3 Indirect Access
631 * Controller programming sequence, couple of cycles of
632 * QSPI_REF_CLK delay is required for the above bit to
633 * be internally synchronized by the QSPI module. Provide 5
634 * cycles of delay.
635 */
636 if (cqspi->wr_delay)
637 ndelay(cqspi->wr_delay);
638
639 while (remaining > 0) {
640 write_bytes = remaining > page_size ? page_size : remaining;
641 write_words = write_bytes / 4;
642 mod_bytes = write_bytes % 4;
643 /* Write 4 bytes at a time then single bytes. */
644 if (write_words) {
645 iowrite32_rep(cqspi->ahb_base, txbuf, write_words);
646 txbuf += (write_words * 4);
647 }
648 if (mod_bytes) {
649 unsigned int temp = 0xFFFFFFFF;
650
> 651 memcpy(&temp, txbuf, min(sizeof(temp), mod_bytes));
652 iowrite32(temp, cqspi->ahb_base);
653 txbuf += mod_bytes;
654 }
655 ret = wait_for_completion_timeout(&cqspi->transfer_complete,
656 msecs_to_jiffies
657 (CQSPI_TIMEOUT_MS));
658 if (!ret) {
659 dev_err(nor->dev, "Indirect write timeout\n");
660 ret = -ETIMEDOUT;
661 goto failwr;
662 }
663
664 remaining -= write_bytes;
665
666 if (remaining > 0)
667 reinit_completion(&cqspi->transfer_complete);
668 }
669
670 /* Check indirect done status */
671 ret = cqspi_wait_for_bit(reg_base + CQSPI_REG_INDIRECTWR,
672 CQSPI_REG_INDIRECTWR_DONE_MASK, 0);
673 if (ret) {
674 dev_err(nor->dev,
675 "Indirect write completion error (%i)\n", ret);
676 goto failwr;
677 }
678
679 /* Disable interrupt. */
680 writel(0, reg_base + CQSPI_REG_IRQMASK);
681
682 /* Clear indirect completion status */
683 writel(CQSPI_REG_INDIRECTWR_DONE_MASK, reg_base + CQSPI_REG_INDIRECTWR);
684
685 cqspi_wait_idle(cqspi);
686
687 return 0;
688
689 failwr:
690 /* Disable interrupt. */
691 writel(0, reg_base + CQSPI_REG_IRQMASK);
692
693 /* Cancel the indirect write */
694 writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK,
695 reg_base + CQSPI_REG_INDIRECTWR);
696 return ret;
697 }
698
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Thor,
I love your patch! Perhaps something to improve:
[auto build test WARNING on v4.16-rc4]
[also build test WARNING on next-20180319]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/thor-thayer-linux-intel-com/mtd-spi-nor-Fix-Cadence-QSPI-page-fault-kernel-panic/20180320-133857
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__
sparse warnings: (new ones prefixed by >>)
>> drivers/mtd/spi-nor/cadence-quadspi.c:651:25: sparse: incompatible types in comparison expression (different type sizes)
In file included from include/linux/clk.h:16:0,
from drivers/mtd/spi-nor/cadence-quadspi.c:18:
drivers/mtd/spi-nor/cadence-quadspi.c: In function 'cqspi_indirect_write_execute':
include/linux/kernel.h:793:16: warning: comparison of distinct pointer types lacks a cast
(void) (&min1 == &min2); 7- ^
include/linux/kernel.h:802:2: note: in expansion of macro '__min'
__min(typeof(x), typeof(y), 10- ^~~~~
drivers/mtd/spi-nor/cadence-quadspi.c:651:25: note: in expansion of macro 'min'
memcpy(&temp, txbuf, min(sizeof(temp), mod_bytes));
^~~
vim +651 drivers/mtd/spi-nor/cadence-quadspi.c
606
607 static int cqspi_indirect_write_execute(struct spi_nor *nor, loff_t to_addr,
608 const u8 *txbuf, const size_t n_tx)
609 {
610 const unsigned int page_size = nor->page_size;
611 struct cqspi_flash_pdata *f_pdata = nor->priv;
612 struct cqspi_st *cqspi = f_pdata->cqspi;
613 void __iomem *reg_base = cqspi->iobase;
614 unsigned int remaining = n_tx;
615 unsigned int mod_bytes, write_bytes, write_words;
616 int ret;
617
618 writel(to_addr, reg_base + CQSPI_REG_INDIRECTWRSTARTADDR);
619 writel(remaining, reg_base + CQSPI_REG_INDIRECTWRBYTES);
620
621 /* Clear all interrupts. */
622 writel(CQSPI_IRQ_STATUS_MASK, reg_base + CQSPI_REG_IRQSTATUS);
623
624 writel(CQSPI_IRQ_MASK_WR, reg_base + CQSPI_REG_IRQMASK);
625
626 reinit_completion(&cqspi->transfer_complete);
627 writel(CQSPI_REG_INDIRECTWR_START_MASK,
628 reg_base + CQSPI_REG_INDIRECTWR);
629 /*
630 * As per 66AK2G02 TRM SPRUHY8F section 11.15.5.3 Indirect Access
631 * Controller programming sequence, couple of cycles of
632 * QSPI_REF_CLK delay is required for the above bit to
633 * be internally synchronized by the QSPI module. Provide 5
634 * cycles of delay.
635 */
636 if (cqspi->wr_delay)
637 ndelay(cqspi->wr_delay);
638
639 while (remaining > 0) {
640 write_bytes = remaining > page_size ? page_size : remaining;
641 write_words = write_bytes / 4;
642 mod_bytes = write_bytes % 4;
643 /* Write 4 bytes at a time then single bytes. */
644 if (write_words) {
645 iowrite32_rep(cqspi->ahb_base, txbuf, write_words);
646 txbuf += (write_words * 4);
647 }
648 if (mod_bytes) {
649 unsigned int temp = 0xFFFFFFFF;
650
> 651 memcpy(&temp, txbuf, min(sizeof(temp), mod_bytes));
652 iowrite32(temp, cqspi->ahb_base);
653 txbuf += mod_bytes;
654 }
655 ret = wait_for_completion_timeout(&cqspi->transfer_complete,
656 msecs_to_jiffies
657 (CQSPI_TIMEOUT_MS));
658 if (!ret) {
659 dev_err(nor->dev, "Indirect write timeout\n");
660 ret = -ETIMEDOUT;
661 goto failwr;
662 }
663
664 remaining -= write_bytes;
665
666 if (remaining > 0)
667 reinit_completion(&cqspi->transfer_complete);
668 }
669
670 /* Check indirect done status */
671 ret = cqspi_wait_for_bit(reg_base + CQSPI_REG_INDIRECTWR,
672 CQSPI_REG_INDIRECTWR_DONE_MASK, 0);
673 if (ret) {
674 dev_err(nor->dev,
675 "Indirect write completion error (%i)\n", ret);
676 goto failwr;
677 }
678
679 /* Disable interrupt. */
680 writel(0, reg_base + CQSPI_REG_IRQMASK);
681
682 /* Clear indirect completion status */
683 writel(CQSPI_REG_INDIRECTWR_DONE_MASK, reg_base + CQSPI_REG_INDIRECTWR);
684
685 cqspi_wait_idle(cqspi);
686
687 return 0;
688
689 failwr:
690 /* Disable interrupt. */
691 writel(0, reg_base + CQSPI_REG_IRQMASK);
692
693 /* Cancel the indirect write */
694 writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK,
695 reg_base + CQSPI_REG_INDIRECTWR);
696 return ret;
697 }
698
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Marek,
On 03/19/2018 05:33 PM, Marek Vasut wrote:
> On 03/19/2018 07:45 PM, [email protected] wrote:
>> From: Thor Thayer <[email protected]>
>>
>> The current Cadence QSPI driver caused a kernel panic when loading
>> a Root Filesystem from QSPI. The problem was caused by reading more
>> bytes than needed because the QSPI operated on 4 bytes at a time.
>> <snip>
>> [ 7.947754] spi_nor_read[1048]:from 0x037cad74, len 1 [bfe07fff]
>> [ 7.956247] cqspi_read[910]:offset 0x58502516, buffer=bfe07fff
>> [ 7.956247]
>> [ 7.966046] Unable to handle kernel paging request at virtual
>> address bfe08002
>> [ 7.973239] pgd = eebfc000
>> [ 7.975931] [bfe08002] *pgd=2fffb811, *pte=00000000, *ppte=00000000
>> </snip>
>> Notice above how only 1 byte needed to be read but by reading 4 bytes
>> into the end of a mapped page, a unrecoverable page fault occurred.
>>
>> This patch uses a temporary buffer to hold the 4 bytes read and then
>> copies only the bytes required into the buffer. A min() function is
>> used to limit the length to prevent buffer overflows.
>>
>> Similar changes were made for the write routine.
>>
>> Signed-off-by: Thor Thayer <[email protected]>
>> ---
>> drivers/mtd/spi-nor/cadence-quadspi.c | 39 ++++++++++++++++++++++++++++-------
>> 1 file changed, 31 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
>> index 4b8e9183489a..b22ed982f896 100644
>> --- a/drivers/mtd/spi-nor/cadence-quadspi.c
>> +++ b/drivers/mtd/spi-nor/cadence-quadspi.c
>> @@ -501,7 +501,8 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
>> void __iomem *reg_base = cqspi->iobase;
>> void __iomem *ahb_base = cqspi->ahb_base;
>> unsigned int remaining = n_rx;
>> - unsigned int bytes_to_read = 0;
>> + unsigned int mod_bytes, words_to_read, bytes_to_read = 0;
>> + u8 *rxbuf_end = rxbuf + n_rx;
>> int ret = 0;
>>
>> writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR);
>> @@ -533,9 +534,21 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
>> bytes_to_read *= cqspi->fifo_width;
>> bytes_to_read = bytes_to_read > remaining ?
>> remaining : bytes_to_read;
>> - ioread32_rep(ahb_base, rxbuf,
>> - DIV_ROUND_UP(bytes_to_read, 4));
>> - rxbuf += bytes_to_read;
>> + /* Read 4 byte chunks before using single byte mode */
>> + words_to_read = bytes_to_read / 4;
>> + mod_bytes = bytes_to_read % 4;
>> + if (words_to_read) {
>> + ioread32_rep(ahb_base, rxbuf, words_to_read);
>> + rxbuf += (words_to_read * 4);
>> + }
>> + if (mod_bytes) {
>> + unsigned int temp = ioread32(ahb_base);
>> +
>> + memcpy(rxbuf, &temp, min((unsigned int)
>> + (rxbuf_end - rxbuf),
>> + mod_bytes));
>> + rxbuf += mod_bytes;
>> + }
>
> Wouldn't it make more sense to read in 4 byte increments all the time
> except for the one last read instead ? This code above where you always
> check for trailing bytes can make the next read cycle do ioreaad32 into
> unaligned memory address and thus cause slowdown. (consider the example
> where the controller first reports it has 5 bytes ready, then reports it
> has 7 bytes ready. If you read 4 bytes in the first cycle, wait a bit
> and then check how much data the controller has in the next cycle, it
> will be 8 bytes, all nicely aligned).
>
> Does it make sense ?
>
Ahh yes, Thanks for the example - I see your point. I'll look into that
change. Thanks.