In preparing to support multiple BCMBCA SoCs, rename bcm63138 to bcmbca
in the driver code and driver file name.
Signed-off-by: William Zhang <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
---
drivers/mtd/nand/raw/brcmnand/Makefile | 2 +-
drivers/mtd/nand/raw/brcmnand/bcm63138_nand.c | 101 ------------------
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 101 ++++++++++++++++++
3 files changed, 102 insertions(+), 102 deletions(-)
delete mode 100644 drivers/mtd/nand/raw/brcmnand/bcm63138_nand.c
create mode 100644 drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
diff --git a/drivers/mtd/nand/raw/brcmnand/Makefile b/drivers/mtd/nand/raw/brcmnand/Makefile
index 9907e3ec4bb2..0536568c6467 100644
--- a/drivers/mtd/nand/raw/brcmnand/Makefile
+++ b/drivers/mtd/nand/raw/brcmnand/Makefile
@@ -2,7 +2,7 @@
# link order matters; don't link the more generic brcmstb_nand.o before the
# more specific iproc_nand.o, for instance
obj-$(CONFIG_MTD_NAND_BRCMNAND_IPROC) += iproc_nand.o
-obj-$(CONFIG_MTD_NAND_BRCMNAND_BCMBCA) += bcm63138_nand.o
+obj-$(CONFIG_MTD_NAND_BRCMNAND_BCMBCA) += bcmbca_nand.o
obj-$(CONFIG_MTD_NAND_BRCMNAND_BCM63XX) += bcm6368_nand.o
obj-$(CONFIG_MTD_NAND_BRCMNAND_BRCMSTB) += brcmstb_nand.o
obj-$(CONFIG_MTD_NAND_BRCMNAND) += brcmnand.o
diff --git a/drivers/mtd/nand/raw/brcmnand/bcm63138_nand.c b/drivers/mtd/nand/raw/brcmnand/bcm63138_nand.c
deleted file mode 100644
index 71ddcc611f6e..000000000000
--- a/drivers/mtd/nand/raw/brcmnand/bcm63138_nand.c
+++ /dev/null
@@ -1,101 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2015 Broadcom Corporation
- */
-
-#include <linux/device.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-#include "brcmnand.h"
-
-struct bcm63138_nand_soc {
- struct brcmnand_soc soc;
- void __iomem *base;
-};
-
-#define BCM63138_NAND_INT_STATUS 0x00
-#define BCM63138_NAND_INT_EN 0x04
-
-enum {
- BCM63138_CTLRDY = BIT(4),
-};
-
-static bool bcm63138_nand_intc_ack(struct brcmnand_soc *soc)
-{
- struct bcm63138_nand_soc *priv =
- container_of(soc, struct bcm63138_nand_soc, soc);
- void __iomem *mmio = priv->base + BCM63138_NAND_INT_STATUS;
- u32 val = brcmnand_readl(mmio);
-
- if (val & BCM63138_CTLRDY) {
- brcmnand_writel(val & ~BCM63138_CTLRDY, mmio);
- return true;
- }
-
- return false;
-}
-
-static void bcm63138_nand_intc_set(struct brcmnand_soc *soc, bool en)
-{
- struct bcm63138_nand_soc *priv =
- container_of(soc, struct bcm63138_nand_soc, soc);
- void __iomem *mmio = priv->base + BCM63138_NAND_INT_EN;
- u32 val = brcmnand_readl(mmio);
-
- if (en)
- val |= BCM63138_CTLRDY;
- else
- val &= ~BCM63138_CTLRDY;
-
- brcmnand_writel(val, mmio);
-}
-
-static int bcm63138_nand_probe(struct platform_device *pdev)
-{
- struct device *dev = &pdev->dev;
- struct bcm63138_nand_soc *priv;
- struct brcmnand_soc *soc;
- struct resource *res;
-
- priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
- soc = &priv->soc;
-
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand-int-base");
- priv->base = devm_ioremap_resource(dev, res);
- if (IS_ERR(priv->base))
- return PTR_ERR(priv->base);
-
- soc->ctlrdy_ack = bcm63138_nand_intc_ack;
- soc->ctlrdy_set_enabled = bcm63138_nand_intc_set;
-
- return brcmnand_probe(pdev, soc);
-}
-
-static const struct of_device_id bcm63138_nand_of_match[] = {
- { .compatible = "brcm,nand-bcm63138" },
- {},
-};
-MODULE_DEVICE_TABLE(of, bcm63138_nand_of_match);
-
-static struct platform_driver bcm63138_nand_driver = {
- .probe = bcm63138_nand_probe,
- .remove = brcmnand_remove,
- .driver = {
- .name = "bcm63138_nand",
- .pm = &brcmnand_pm_ops,
- .of_match_table = bcm63138_nand_of_match,
- }
-};
-module_platform_driver(bcm63138_nand_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Brian Norris");
-MODULE_DESCRIPTION("NAND driver for BCM63138");
diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
new file mode 100644
index 000000000000..f51f857eeea6
--- /dev/null
+++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2015 Broadcom Corporation
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "brcmnand.h"
+
+struct bcmbca_nand_soc {
+ struct brcmnand_soc soc;
+ void __iomem *base;
+};
+
+#define BCMBCA_NAND_INT_STATUS 0x00
+#define BCMBCA_NAND_INT_EN 0x04
+
+enum {
+ BCMBCA_CTLRDY = BIT(4),
+};
+
+static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
+{
+ struct bcmbca_nand_soc *priv =
+ container_of(soc, struct bcmbca_nand_soc, soc);
+ void __iomem *mmio = priv->base + BCMBCA_NAND_INT_STATUS;
+ u32 val = brcmnand_readl(mmio);
+
+ if (val & BCMBCA_CTLRDY) {
+ brcmnand_writel(val & ~BCMBCA_CTLRDY, mmio);
+ return true;
+ }
+
+ return false;
+}
+
+static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
+{
+ struct bcmbca_nand_soc *priv =
+ container_of(soc, struct bcmbca_nand_soc, soc);
+ void __iomem *mmio = priv->base + BCMBCA_NAND_INT_EN;
+ u32 val = brcmnand_readl(mmio);
+
+ if (en)
+ val |= BCMBCA_CTLRDY;
+ else
+ val &= ~BCMBCA_CTLRDY;
+
+ brcmnand_writel(val, mmio);
+}
+
+static int bcmbca_nand_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct bcmbca_nand_soc *priv;
+ struct brcmnand_soc *soc;
+ struct resource *res;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ soc = &priv->soc;
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand-int-base");
+ priv->base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ soc->ctlrdy_ack = bcmbca_nand_intc_ack;
+ soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
+
+ return brcmnand_probe(pdev, soc);
+}
+
+static const struct of_device_id bcmbca_nand_of_match[] = {
+ { .compatible = "brcm,nand-bcm63138" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, bcmbca_nand_of_match);
+
+static struct platform_driver bcmbca_nand_driver = {
+ .probe = bcmbca_nand_probe,
+ .remove = brcmnand_remove,
+ .driver = {
+ .name = "bcmbca_nand",
+ .pm = &brcmnand_pm_ops,
+ .of_match_table = bcmbca_nand_of_match,
+ }
+};
+module_platform_driver(bcmbca_nand_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Brian Norris");
+MODULE_DESCRIPTION("NAND driver for BCMBCA");
--
2.37.3
Hi William,
[email protected] wrote on Tue, 6 Jun 2023 16:12:51 -0700:
> BCMBCA broadband SoC based board design does not specify ecc setting in
> dts but rather use the SoC NAND strap info to obtain the ecc strength
> and spare area size setting. Add brcm,nand-ecc-use-strap dts propety for
> this purpose and update driver to support this option.
>
> The generic nand ecc settings still take precedence over this flag. For
> example, if nand-ecc-strength is set in the dts, the driver ignores the
> strap setting and falls back to original behavior. This makes sure that
> the existing BCMBCA board dts still works the old way even the strap
> flag is set in the BCMBCA chip dtsi.
>
> Signed-off-by: William Zhang <[email protected]>
> ---
>
> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 72 +++++++++++++++++++++---
> 1 file changed, 64 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> index 656be4d73016..8c7cea36ac71 100644
> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> @@ -1076,6 +1076,38 @@ static void brcmnand_set_sector_size_1k(struct brcmnand_host *host, int val)
> nand_writereg(ctrl, acc_control_offs, tmp);
> }
>
> +static int brcmnand_get_spare_size(struct brcmnand_host *host)
> +{
> + struct brcmnand_controller *ctrl = host->ctrl;
> + u16 acc_control_offs = brcmnand_cs_offset(ctrl, host->cs,
> + BRCMNAND_CS_ACC_CONTROL);
> + u32 acc = nand_readreg(ctrl, acc_control_offs);
> +
> + return (acc&brcmnand_spare_area_mask(ctrl));
> +}
> +
> +static int brcmnand_get_ecc_strength(struct brcmnand_host *host)
> +{
> + struct brcmnand_controller *ctrl = host->ctrl;
> + u16 acc_control_offs = brcmnand_cs_offset(ctrl, host->cs,
> + BRCMNAND_CS_ACC_CONTROL);
> + int sector_size_1k = brcmnand_get_sector_size_1k(host);
> + int spare_area_size, ecc_level, ecc_strength;
> + u32 acc;
> +
> + spare_area_size = brcmnand_get_spare_size(host);
> + acc = nand_readreg(ctrl, acc_control_offs);
> + ecc_level = (acc & brcmnand_ecc_level_mask(ctrl)) >> brcmnand_ecc_level_shift(ctrl);
> + if (sector_size_1k)
> + ecc_strength = ecc_level<<1;
?
If you mean "x2" then let the compiler do that.
> + else if (spare_area_size == 16 && ecc_level == 15)
> + ecc_strength = 1; /* hamming */
> + else
> + ecc_strength = ecc_level;
> +
> + return ecc_strength;
> +}
> +
> /***********************************************************************
> * CS_NAND_SELECT
> ***********************************************************************/
> @@ -2656,19 +2688,43 @@ static int brcmnand_setup_dev(struct brcmnand_host *host)
> nanddev_get_ecc_requirements(&chip->base);
> struct brcmnand_controller *ctrl = host->ctrl;
> struct brcmnand_cfg *cfg = &host->hwcfg;
> - char msg[128];
> + struct device_node *np = nand_get_flash_node(chip);
> u32 offs, tmp, oob_sector;
> - int ret;
> + int ret, sector_size_1k = 0;
> + bool use_strap = false;
> + char msg[128];
>
> memset(cfg, 0, sizeof(*cfg));
> + use_strap = of_property_read_bool(np, "brcm,nand-ecc-use-strap");
> +
> + /*
> + * Set ECC size and strength based on hw configuration from strap
> + * if device tree does not specify them and use strap property is set
> + * If ecc strength is set in dts, don't use strap setting.
> + */
> + if (chip->ecc.strength)
> + use_strap = 0;
> +
> + if (use_strap) {
> + chip->ecc.strength = brcmnand_get_ecc_strength(host);
> + sector_size_1k = brcmnand_get_sector_size_1k(host);
> + if (chip->ecc.size == 0) {
> + if (sector_size_1k < 0)
> + chip->ecc.size = 512;
> + else
> + chip->ecc.size = 512<<sector_size_1k;
Please run checkpatch.pl --strict
> + }
> + }
>
> - ret = of_property_read_u32(nand_get_flash_node(chip),
> - "brcm,nand-oob-sector-size",
> - &oob_sector);
> + ret = of_property_read_u32(np, "brcm,nand-oob-sector-size",
> + &oob_sector);
> if (ret) {
> - /* Use detected size */
> - cfg->spare_area_size = mtd->oobsize /
> - (mtd->writesize >> FC_SHIFT);
> + if (use_strap)
> + cfg->spare_area_size = brcmnand_get_spare_size(host);
> + else
> + /* Use detected size */
> + cfg->spare_area_size = mtd->oobsize /
> + (mtd->writesize >> FC_SHIFT);
> } else {
> cfg->spare_area_size = oob_sector;
> }
Thanks,
Miquèl
Hi William,
[email protected] wrote on Tue, 6 Jun 2023 16:12:44 -0700:
> When the oob buffer length is not in multiple of words, the oob write
> function does out-of-bounds read on the oob source buffer at the last
> iteration. Fix that by always checking length limit on the oob buffer
> read and fill with 0xff when reaching the end of the buffer to the oob
> registers.
>
> Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller")
> Signed-off-by: William Zhang <[email protected]>
> Reviewed-by: Florian Fainelli <[email protected]>
> ---
>
> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> index 20832857c4aa..d920e88c7f5b 100644
> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> @@ -1486,10 +1486,10 @@ static int write_oob_to_regs(struct brcmnand_controller *ctrl, int i,
>
> for (j = 0; j < tbytes; j += 4)
> oob_reg_write(ctrl, j,
> - (oob[j + 0] << 24) |
> - (oob[j + 1] << 16) |
> - (oob[j + 2] << 8) |
> - (oob[j + 3] << 0));
> + (((j < tbytes) ? oob[j] : 0xff) << 24) |
> + (((j + 1 < tbytes) ? oob[j + 1] : 0xff) << 16) |
> + (((j + 2 < tbytes) ? oob[j + 2] : 0xff) << 8) |
> + ((j + 3 < tbytes) ? oob[j + 3] : 0xff));
This is a lot of additional operations which most of the time are not
relevant. I would instead got for one less iteration in the for loop
when there is unaligned data, and then dedicated if/else to fill the
missing bytes.
> return tbytes;
> }
>
Thanks,
Miquèl
Hi William,
[email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
> The BCMBCA broadband SoC integrates the NAND controller differently than
> STB, iProc and other SoCs. It has different endianness for NAND cache
> data and ONFI parameter data.
>
> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
> and performance improvement using the optimized memcpy function on NAND
> cache memory.
>
> Signed-off-by: William Zhang <[email protected]>
> ---
>
> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
> 3 files changed, 68 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> index 7e48b6a0bfa2..899103a62c98 100644
> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> @@ -26,6 +26,18 @@ enum {
> BCMBCA_CTLRDY = BIT(4),
> };
>
> +#if defined(CONFIG_ARM64)
> +#define ALIGN_REQ 8
> +#else
> +#define ALIGN_REQ 4
> +#endif
> +
> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
> +{
> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
> +}
> +
> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
> {
> struct bcmbca_nand_soc *priv =
> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
> brcmnand_writel(val, mmio);
> }
>
> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
> + void __iomem *flash_cache, u32 *buffer,
> + int fc_words, bool is_param)
> +{
> + int i;
> +
> + if (!is_param) {
> + /*
> + * memcpy can do unaligned aligned access depending on source
> + * and dest address, which is incompatible with nand cache. Fallback
> + * to the memcpy for io version
> + */
> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
> + else
> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
> + } else {
> + /* Flash cache has same endian as the host for parameter pages */
> + for (i = 0; i < fc_words; i++, buffer++)
> + *buffer = __raw_readl(flash_cache + i * 4);
> + }
> +}
> +
> static int bcmbca_nand_probe(struct platform_device *pdev)
> {
> struct device *dev = &pdev->dev;
> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
>
> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
> + soc->read_data_bus = bcmbca_read_data_bus;
>
> return brcmnand_probe(pdev, soc);
> }
> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> index d920e88c7f5b..656be4d73016 100644
> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
> return brcmnand_readl(ctrl->edu_base + offs);
> }
>
> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
> + void __iomem *flash_cache, u32 *buffer,
> + int fc_words, bool is_param)
> +{
> + struct brcmnand_soc *soc = ctrl->soc;
> + int i;
> +
> + if (soc->read_data_bus) {
> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
> + } else {
> + if (!is_param) {
> + for (i = 0; i < fc_words; i++, buffer++)
> + *buffer = brcmnand_read_fc(ctrl, i);
> + } else {
> + for (i = 0; i < fc_words; i++)
> + /*
> + * Flash cache is big endian for parameter pages, at
> + * least on STB SoCs
> + */
> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> + }
> + }
Perhaps we could have a single function that is statically assigned at
probe time instead of a first helper with two conditions which calls in
one case another hook... This can be simplified I guess.
> +}
> +
> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
> {
>
> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
> native_cmd == CMD_PARAMETER_CHANGE_COL) {
> /* Copy flash cache word-wise */
> u32 *flash_cache = (u32 *)ctrl->flash_cache;
> - int i;
>
> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
>
> - /*
> - * Must cache the FLASH_CACHE now, since changes in
> - * SECTOR_SIZE_1K may invalidate it
> - */
> - for (i = 0; i < FC_WORDS; i++)
> - /*
> - * Flash cache is big endian for parameter pages, at
> - * least on STB SoCs
> - */
> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
> + FC_WORDS, true);
>
> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
>
> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> {
> struct brcmnand_host *host = nand_get_controller_data(chip);
> struct brcmnand_controller *ctrl = host->ctrl;
> - int i, j, ret = 0;
> + int i, ret = 0;
>
> brcmnand_clear_ecc_addr(ctrl);
>
> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> if (likely(buf)) {
> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
>
> - for (j = 0; j < FC_WORDS; j++, buf++)
> - *buf = brcmnand_read_fc(ctrl, j);
> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
> + FC_WORDS, false);
> + buf += FC_WORDS;
>
> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
> }
> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> index f1f93d85f50d..88819bc395f8 100644
> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> @@ -24,6 +24,8 @@ struct brcmnand_soc {
> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
> bool is_param);
> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
> + u32 *buffer, int fc_words, bool is_param);
> const struct brcmnand_io_ops *ops;
> };
>
Thanks,
Miquèl
Hi William,
[email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
> The BCMBCA broadband SoC integrates the NAND controller differently than
> STB, iProc and other SoCs. It has different endianness for NAND cache
> data and ONFI parameter data.
>
> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
> and performance improvement using the optimized memcpy function on NAND
> cache memory.
>
> Signed-off-by: William Zhang <[email protected]>
> ---
>
> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
> 3 files changed, 68 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> index 7e48b6a0bfa2..899103a62c98 100644
> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> @@ -26,6 +26,18 @@ enum {
> BCMBCA_CTLRDY = BIT(4),
> };
>
> +#if defined(CONFIG_ARM64)
> +#define ALIGN_REQ 8
> +#else
> +#define ALIGN_REQ 4
> +#endif
> +
> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
> +{
> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
> +}
> +
> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
> {
> struct bcmbca_nand_soc *priv =
> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
> brcmnand_writel(val, mmio);
> }
>
> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
> + void __iomem *flash_cache, u32 *buffer,
> + int fc_words, bool is_param)
> +{
> + int i;
> +
> + if (!is_param) {
> + /*
> + * memcpy can do unaligned aligned access depending on source
> + * and dest address, which is incompatible with nand cache. Fallback
> + * to the memcpy for io version
> + */
> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
> + else
> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
> + } else {
> + /* Flash cache has same endian as the host for parameter pages */
> + for (i = 0; i < fc_words; i++, buffer++)
> + *buffer = __raw_readl(flash_cache + i * 4);
> + }
> +}
> +
> static int bcmbca_nand_probe(struct platform_device *pdev)
> {
> struct device *dev = &pdev->dev;
> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
>
> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
> + soc->read_data_bus = bcmbca_read_data_bus;
>
> return brcmnand_probe(pdev, soc);
> }
> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> index d920e88c7f5b..656be4d73016 100644
> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
> return brcmnand_readl(ctrl->edu_base + offs);
> }
>
> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
> + void __iomem *flash_cache, u32 *buffer,
> + int fc_words, bool is_param)
I strongly dislike this "is_param" boolean.
When is the data in host endianness? When is it not?
If we think about an exec_op() conversion and drop cmdfunc(), what
would be the discriminant?
> +{
> + struct brcmnand_soc *soc = ctrl->soc;
> + int i;
> +
> + if (soc->read_data_bus) {
> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
> + } else {
> + if (!is_param) {
> + for (i = 0; i < fc_words; i++, buffer++)
> + *buffer = brcmnand_read_fc(ctrl, i);
> + } else {
> + for (i = 0; i < fc_words; i++)
> + /*
> + * Flash cache is big endian for parameter pages, at
> + * least on STB SoCs
> + */
> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> + }
> + }
> +}
> +
> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
> {
>
> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
> native_cmd == CMD_PARAMETER_CHANGE_COL) {
> /* Copy flash cache word-wise */
> u32 *flash_cache = (u32 *)ctrl->flash_cache;
> - int i;
>
> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
>
> - /*
> - * Must cache the FLASH_CACHE now, since changes in
> - * SECTOR_SIZE_1K may invalidate it
> - */
> - for (i = 0; i < FC_WORDS; i++)
> - /*
> - * Flash cache is big endian for parameter pages, at
> - * least on STB SoCs
> - */
> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
> + FC_WORDS, true);
>
> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
>
> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> {
> struct brcmnand_host *host = nand_get_controller_data(chip);
> struct brcmnand_controller *ctrl = host->ctrl;
> - int i, j, ret = 0;
> + int i, ret = 0;
>
> brcmnand_clear_ecc_addr(ctrl);
>
> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> if (likely(buf)) {
> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
>
> - for (j = 0; j < FC_WORDS; j++, buf++)
> - *buf = brcmnand_read_fc(ctrl, j);
> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
> + FC_WORDS, false);
> + buf += FC_WORDS;
>
> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
> }
> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> index f1f93d85f50d..88819bc395f8 100644
> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> @@ -24,6 +24,8 @@ struct brcmnand_soc {
> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
> bool is_param);
> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
> + u32 *buffer, int fc_words, bool is_param);
> const struct brcmnand_io_ops *ops;
> };
>
Thanks,
Miquèl
Hi Miquel,
On 06/07/2023 01:20 AM, Miquel Raynal wrote:
> Hi William,
>
> [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
>
>> The BCMBCA broadband SoC integrates the NAND controller differently than
>> STB, iProc and other SoCs. It has different endianness for NAND cache
>> data and ONFI parameter data.
>>
>> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
>> and performance improvement using the optimized memcpy function on NAND
>> cache memory.
>>
>> Signed-off-by: William Zhang <[email protected]>
>> ---
>>
>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
>> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
>> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
>> 3 files changed, 68 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>> index 7e48b6a0bfa2..899103a62c98 100644
>> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>> @@ -26,6 +26,18 @@ enum {
>> BCMBCA_CTLRDY = BIT(4),
>> };
>>
>> +#if defined(CONFIG_ARM64)
>> +#define ALIGN_REQ 8
>> +#else
>> +#define ALIGN_REQ 4
>> +#endif
>> +
>> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
>> +{
>> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
>> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
>> +}
>> +
>> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
>> {
>> struct bcmbca_nand_soc *priv =
>> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
>> brcmnand_writel(val, mmio);
>> }
>>
>> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
>> + void __iomem *flash_cache, u32 *buffer,
>> + int fc_words, bool is_param)
>> +{
>> + int i;
>> +
>> + if (!is_param) {
>> + /*
>> + * memcpy can do unaligned aligned access depending on source
>> + * and dest address, which is incompatible with nand cache. Fallback
>> + * to the memcpy for io version
>> + */
>> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
>> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
>> + else
>> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
>> + } else {
>> + /* Flash cache has same endian as the host for parameter pages */
>> + for (i = 0; i < fc_words; i++, buffer++)
>> + *buffer = __raw_readl(flash_cache + i * 4);
>> + }
>> +}
>> +
>> static int bcmbca_nand_probe(struct platform_device *pdev)
>> {
>> struct device *dev = &pdev->dev;
>> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
>>
>> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
>> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
>> + soc->read_data_bus = bcmbca_read_data_bus;
>>
>> return brcmnand_probe(pdev, soc);
>> }
>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>> index d920e88c7f5b..656be4d73016 100644
>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
>> return brcmnand_readl(ctrl->edu_base + offs);
>> }
>>
>> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
>> + void __iomem *flash_cache, u32 *buffer,
>> + int fc_words, bool is_param)
>
> I strongly dislike this "is_param" boolean.
>
> When is the data in host endianness? When is it not?
This is little bit complicated. We have two type data read from nand
cache. One for page read and the other for parameter and onfi data read
from the controller side. But it depends on how SoC integrate the nand
cache to system. In broadband SoC, both page and parameter data are in
host endianess but other SoCs is not the same.
I am open to suggestion for is_param function argument but to factor out
this common code in more structured way, I don't see other way around.
>
> If we think about an exec_op() conversion and drop cmdfunc(), what
> would be the discriminant?
>
If we need to implement exec_op in the future, the data is not coming
from nand cache but some other low level data register which may not
subject to the endianess issue.
>> +{
>> + struct brcmnand_soc *soc = ctrl->soc;
>> + int i;
>> +
>> + if (soc->read_data_bus) {
>> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
>> + } else {
>> + if (!is_param) {
>> + for (i = 0; i < fc_words; i++, buffer++)
>> + *buffer = brcmnand_read_fc(ctrl, i);
>> + } else {
>> + for (i = 0; i < fc_words; i++)
>> + /*
>> + * Flash cache is big endian for parameter pages, at
>> + * least on STB SoCs
>> + */
>> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>> + }
>> + }
>> +}
>> +
>> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
>> {
>>
>> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
>> native_cmd == CMD_PARAMETER_CHANGE_COL) {
>> /* Copy flash cache word-wise */
>> u32 *flash_cache = (u32 *)ctrl->flash_cache;
>> - int i;
>>
>> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
>>
>> - /*
>> - * Must cache the FLASH_CACHE now, since changes in
>> - * SECTOR_SIZE_1K may invalidate it
>> - */
>> - for (i = 0; i < FC_WORDS; i++)
>> - /*
>> - * Flash cache is big endian for parameter pages, at
>> - * least on STB SoCs
>> - */
>> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
>> + FC_WORDS, true);
>>
>> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
>>
>> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>> {
>> struct brcmnand_host *host = nand_get_controller_data(chip);
>> struct brcmnand_controller *ctrl = host->ctrl;
>> - int i, j, ret = 0;
>> + int i, ret = 0;
>>
>> brcmnand_clear_ecc_addr(ctrl);
>>
>> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>> if (likely(buf)) {
>> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
>>
>> - for (j = 0; j < FC_WORDS; j++, buf++)
>> - *buf = brcmnand_read_fc(ctrl, j);
>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
>> + FC_WORDS, false);
>> + buf += FC_WORDS;
>>
>> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
>> }
>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>> index f1f93d85f50d..88819bc395f8 100644
>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>> @@ -24,6 +24,8 @@ struct brcmnand_soc {
>> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
>> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
>> bool is_param);
>> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
>> + u32 *buffer, int fc_words, bool is_param);
>> const struct brcmnand_io_ops *ops;
>> };
>>
>
>
> Thanks,
> Miquèl
>
Hi Miquel,
On 06/07/2023 01:22 AM, Miquel Raynal wrote:
> Hi William,
>
> [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
>
>> The BCMBCA broadband SoC integrates the NAND controller differently than
>> STB, iProc and other SoCs. It has different endianness for NAND cache
>> data and ONFI parameter data.
>>
>> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
>> and performance improvement using the optimized memcpy function on NAND
>> cache memory.
>>
>> Signed-off-by: William Zhang <[email protected]>
>> ---
>>
>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
>> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
>> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
>> 3 files changed, 68 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>> index 7e48b6a0bfa2..899103a62c98 100644
>> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>> @@ -26,6 +26,18 @@ enum {
>> BCMBCA_CTLRDY = BIT(4),
>> };
>>
>> +#if defined(CONFIG_ARM64)
>> +#define ALIGN_REQ 8
>> +#else
>> +#define ALIGN_REQ 4
>> +#endif
>> +
>> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
>> +{
>> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
>> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
>> +}
>> +
>> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
>> {
>> struct bcmbca_nand_soc *priv =
>> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
>> brcmnand_writel(val, mmio);
>> }
>>
>> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
>> + void __iomem *flash_cache, u32 *buffer,
>> + int fc_words, bool is_param)
>> +{
>> + int i;
>> +
>> + if (!is_param) {
>> + /*
>> + * memcpy can do unaligned aligned access depending on source
>> + * and dest address, which is incompatible with nand cache. Fallback
>> + * to the memcpy for io version
>> + */
>> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
>> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
>> + else
>> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
>> + } else {
>> + /* Flash cache has same endian as the host for parameter pages */
>> + for (i = 0; i < fc_words; i++, buffer++)
>> + *buffer = __raw_readl(flash_cache + i * 4);
>> + }
>> +}
>> +
>> static int bcmbca_nand_probe(struct platform_device *pdev)
>> {
>> struct device *dev = &pdev->dev;
>> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
>>
>> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
>> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
>> + soc->read_data_bus = bcmbca_read_data_bus;
>>
>> return brcmnand_probe(pdev, soc);
>> }
>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>> index d920e88c7f5b..656be4d73016 100644
>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
>> return brcmnand_readl(ctrl->edu_base + offs);
>> }
>>
>> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
>> + void __iomem *flash_cache, u32 *buffer,
>> + int fc_words, bool is_param)
>> +{
>> + struct brcmnand_soc *soc = ctrl->soc;
>> + int i;
>> +
>> + if (soc->read_data_bus) {
>> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
>> + } else {
>> + if (!is_param) {
>> + for (i = 0; i < fc_words; i++, buffer++)
>> + *buffer = brcmnand_read_fc(ctrl, i);
>> + } else {
>> + for (i = 0; i < fc_words; i++)
>> + /*
>> + * Flash cache is big endian for parameter pages, at
>> + * least on STB SoCs
>> + */
>> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>> + }
>> + }
>
> Perhaps we could have a single function that is statically assigned at
> probe time instead of a first helper with two conditions which calls in
> one case another hook... This can be simplified I guess.
>
Well this will need to be done at the SoC specific implementation level
(bcm<xxx>_nand.c) and each SoC will need to have either general data bus
read func with is_param option or data_bus_read_page,
data_bus_read_param. Not sure how much this can be simplified... Or we
have default implementation in brcmnand.c but then there is one
condition check too. Page read is done at 512 bytes burst. One or two
conditions check outside of the per 512 bytes read loop does not sounds
too bad if performance is concern.
>> +}
>> +
>> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
>> {
>>
>> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
>> native_cmd == CMD_PARAMETER_CHANGE_COL) {
>> /* Copy flash cache word-wise */
>> u32 *flash_cache = (u32 *)ctrl->flash_cache;
>> - int i;
>>
>> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
>>
>> - /*
>> - * Must cache the FLASH_CACHE now, since changes in
>> - * SECTOR_SIZE_1K may invalidate it
>> - */
>> - for (i = 0; i < FC_WORDS; i++)
>> - /*
>> - * Flash cache is big endian for parameter pages, at
>> - * least on STB SoCs
>> - */
>> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
>> + FC_WORDS, true);
>>
>> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
>>
>> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>> {
>> struct brcmnand_host *host = nand_get_controller_data(chip);
>> struct brcmnand_controller *ctrl = host->ctrl;
>> - int i, j, ret = 0;
>> + int i, ret = 0;
>>
>> brcmnand_clear_ecc_addr(ctrl);
>>
>> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>> if (likely(buf)) {
>> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
>>
>> - for (j = 0; j < FC_WORDS; j++, buf++)
>> - *buf = brcmnand_read_fc(ctrl, j);
>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
>> + FC_WORDS, false);
>> + buf += FC_WORDS;
>>
>> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
>> }
>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>> index f1f93d85f50d..88819bc395f8 100644
>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>> @@ -24,6 +24,8 @@ struct brcmnand_soc {
>> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
>> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
>> bool is_param);
>> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
>> + u32 *buffer, int fc_words, bool is_param);
>> const struct brcmnand_io_ops *ops;
>> };
>>
>
>
> Thanks,
> Miquèl
>
Hi William,
[email protected] wrote on Wed, 7 Jun 2023 13:12:02 -0700:
> Hi Miquel,
>
> On 06/07/2023 01:20 AM, Miquel Raynal wrote:
> > Hi William,
> >
> > [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
> >
> >> The BCMBCA broadband SoC integrates the NAND controller differently than
> >> STB, iProc and other SoCs. It has different endianness for NAND cache
> >> data and ONFI parameter data.
> >>
> >> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
> >> and performance improvement using the optimized memcpy function on NAND
> >> cache memory.
> >>
> >> Signed-off-by: William Zhang <[email protected]>
> >> ---
> >>
> >> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
> >> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
> >> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
> >> 3 files changed, 68 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >> index 7e48b6a0bfa2..899103a62c98 100644
> >> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >> @@ -26,6 +26,18 @@ enum {
> >> BCMBCA_CTLRDY = BIT(4),
> >> };
> >> >> +#if defined(CONFIG_ARM64)
> >> +#define ALIGN_REQ 8
> >> +#else
> >> +#define ALIGN_REQ 4
> >> +#endif
> >> +
> >> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
> >> +{
> >> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
> >> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
> >> +}
> >> +
> >> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
> >> {
> >> struct bcmbca_nand_soc *priv =
> >> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
> >> brcmnand_writel(val, mmio);
> >> }
> >> >> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
> >> + void __iomem *flash_cache, u32 *buffer,
> >> + int fc_words, bool is_param)
> >> +{
> >> + int i;
> >> +
> >> + if (!is_param) {
> >> + /*
> >> + * memcpy can do unaligned aligned access depending on source
> >> + * and dest address, which is incompatible with nand cache. Fallback
> >> + * to the memcpy for io version
> >> + */
> >> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
> >> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
> >> + else
> >> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
> >> + } else {
> >> + /* Flash cache has same endian as the host for parameter pages */
> >> + for (i = 0; i < fc_words; i++, buffer++)
> >> + *buffer = __raw_readl(flash_cache + i * 4);
> >> + }
> >> +}
> >> +
> >> static int bcmbca_nand_probe(struct platform_device *pdev)
> >> {
> >> struct device *dev = &pdev->dev;
> >> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
> >> >> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
> >> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
> >> + soc->read_data_bus = bcmbca_read_data_bus;
> >> >> return brcmnand_probe(pdev, soc);
> >> }
> >> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >> index d920e88c7f5b..656be4d73016 100644
> >> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
> >> return brcmnand_readl(ctrl->edu_base + offs);
> >> }
> >> >> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
> >> + void __iomem *flash_cache, u32 *buffer,
> >> + int fc_words, bool is_param)
> >
> > I strongly dislike this "is_param" boolean.
> >
> > When is the data in host endianness? When is it not?
> This is little bit complicated. We have two type data read from nand cache. One for page read and the other for parameter and onfi data read from the controller side. But it depends on how SoC integrate the nand cache to system. In broadband SoC, both page and parameter data are in host endianess but other SoCs is not the same.
>
> I am open to suggestion for is_param function argument but to factor out this common code in more structured way, I don't see other way around.
Alright, so this is SoC dependent, very well -> a (sub)compatible per
SoC + platform data associated to it with the right function.
> > If we think about an exec_op() conversion and drop cmdfunc(), what
> > would be the discriminant?
> >
> If we need to implement exec_op in the future, the data is not coming from nand cache but some other low level data register which may not subject to the endianess issue.
Can't you use the same cache all the time here as well then? And avoid
the need for this overly complex logic?
>
> >> +{
> >> + struct brcmnand_soc *soc = ctrl->soc;
> >> + int i;
> >> +
> >> + if (soc->read_data_bus) {
> >> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
> >> + } else {
> >> + if (!is_param) {
> >> + for (i = 0; i < fc_words; i++, buffer++)
> >> + *buffer = brcmnand_read_fc(ctrl, i);
> >> + } else {
> >> + for (i = 0; i < fc_words; i++)
> >> + /*
> >> + * Flash cache is big endian for parameter pages, at
> >> + * least on STB SoCs
> >> + */
> >> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> >> + }
> >> + }
> >> +}
> >> +
> >> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
> >> {
> >> >> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
> >> native_cmd == CMD_PARAMETER_CHANGE_COL) {
> >> /* Copy flash cache word-wise */
> >> u32 *flash_cache = (u32 *)ctrl->flash_cache;
> >> - int i;
> >> >> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
> >> >> - /*
> >> - * Must cache the FLASH_CACHE now, since changes in
> >> - * SECTOR_SIZE_1K may invalidate it
> >> - */
> >> - for (i = 0; i < FC_WORDS; i++)
> >> - /*
> >> - * Flash cache is big endian for parameter pages, at
> >> - * least on STB SoCs
> >> - */
> >> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> >> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
> >> + FC_WORDS, true);
> >> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
> >> >> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> >> {
> >> struct brcmnand_host *host = nand_get_controller_data(chip);
> >> struct brcmnand_controller *ctrl = host->ctrl;
> >> - int i, j, ret = 0;
> >> + int i, ret = 0;
> >> >> brcmnand_clear_ecc_addr(ctrl);
> >> >> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> >> if (likely(buf)) {
> >> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
> >> >> - for (j = 0; j < FC_WORDS; j++, buf++)
> >> - *buf = brcmnand_read_fc(ctrl, j);
> >> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
> >> + FC_WORDS, false);
> >> + buf += FC_WORDS;
> >> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
> >> }
> >> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >> index f1f93d85f50d..88819bc395f8 100644
> >> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >> @@ -24,6 +24,8 @@ struct brcmnand_soc {
> >> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
> >> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
> >> bool is_param);
> >> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
> >> + u32 *buffer, int fc_words, bool is_param);
> >> const struct brcmnand_io_ops *ops;
> >> };
> >> > >
> > Thanks,
> > Miquèl
> >
Thanks,
Miquèl
Hi William,
[email protected] wrote on Wed, 7 Jun 2023 13:24:23 -0700:
> Hi Miquel,
>
> On 06/07/2023 01:22 AM, Miquel Raynal wrote:
> > Hi William,
> >
> > [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
> >
> >> The BCMBCA broadband SoC integrates the NAND controller differently than
> >> STB, iProc and other SoCs. It has different endianness for NAND cache
> >> data and ONFI parameter data.
> >>
> >> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
> >> and performance improvement using the optimized memcpy function on NAND
> >> cache memory.
> >>
> >> Signed-off-by: William Zhang <[email protected]>
> >> ---
> >>
> >> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
> >> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
> >> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
> >> 3 files changed, 68 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >> index 7e48b6a0bfa2..899103a62c98 100644
> >> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >> @@ -26,6 +26,18 @@ enum {
> >> BCMBCA_CTLRDY = BIT(4),
> >> };
> >> >> +#if defined(CONFIG_ARM64)
> >> +#define ALIGN_REQ 8
> >> +#else
> >> +#define ALIGN_REQ 4
> >> +#endif
> >> +
> >> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
> >> +{
> >> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
> >> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
> >> +}
> >> +
> >> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
> >> {
> >> struct bcmbca_nand_soc *priv =
> >> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
> >> brcmnand_writel(val, mmio);
> >> }
> >> >> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
> >> + void __iomem *flash_cache, u32 *buffer,
> >> + int fc_words, bool is_param)
> >> +{
> >> + int i;
> >> +
> >> + if (!is_param) {
> >> + /*
> >> + * memcpy can do unaligned aligned access depending on source
> >> + * and dest address, which is incompatible with nand cache. Fallback
> >> + * to the memcpy for io version
> >> + */
> >> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
> >> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
> >> + else
> >> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
> >> + } else {
> >> + /* Flash cache has same endian as the host for parameter pages */
> >> + for (i = 0; i < fc_words; i++, buffer++)
> >> + *buffer = __raw_readl(flash_cache + i * 4);
> >> + }
> >> +}
> >> +
> >> static int bcmbca_nand_probe(struct platform_device *pdev)
> >> {
> >> struct device *dev = &pdev->dev;
> >> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
> >> >> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
> >> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
> >> + soc->read_data_bus = bcmbca_read_data_bus;
> >> >> return brcmnand_probe(pdev, soc);
> >> }
> >> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >> index d920e88c7f5b..656be4d73016 100644
> >> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
> >> return brcmnand_readl(ctrl->edu_base + offs);
> >> }
> >> >> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
> >> + void __iomem *flash_cache, u32 *buffer,
> >> + int fc_words, bool is_param)
> >> +{
> >> + struct brcmnand_soc *soc = ctrl->soc;
> >> + int i;
> >> +
> >> + if (soc->read_data_bus) {
> >> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
> >> + } else {
> >> + if (!is_param) {
> >> + for (i = 0; i < fc_words; i++, buffer++)
> >> + *buffer = brcmnand_read_fc(ctrl, i);
> >> + } else {
> >> + for (i = 0; i < fc_words; i++)
> >> + /*
> >> + * Flash cache is big endian for parameter pages, at
> >> + * least on STB SoCs
> >> + */
> >> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> >> + }
> >> + }
> >
> > Perhaps we could have a single function that is statically assigned at
> > probe time instead of a first helper with two conditions which calls in
> > one case another hook... This can be simplified I guess.
> >
> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
You told me in case we would use exec_op we could avoid the param
cache. If that's true then the whole support can be simplified.
> Not sure how much this can be simplified... Or we have default
> implementation in brcmnand.c but then there is one condition check
> too. Page read is done at 512 bytes burst. One or two conditions
> check outside of the per 512 bytes read loop does not sounds too bad
> if performance is concern.
It is unreadable. That is my main concern.
>
> >> +}
> >> +
> >> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
> >> {
> >> >> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
> >> native_cmd == CMD_PARAMETER_CHANGE_COL) {
> >> /* Copy flash cache word-wise */
> >> u32 *flash_cache = (u32 *)ctrl->flash_cache;
> >> - int i;
> >> >> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
> >> >> - /*
> >> - * Must cache the FLASH_CACHE now, since changes in
> >> - * SECTOR_SIZE_1K may invalidate it
> >> - */
> >> - for (i = 0; i < FC_WORDS; i++)
> >> - /*
> >> - * Flash cache is big endian for parameter pages, at
> >> - * least on STB SoCs
> >> - */
> >> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> >> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
> >> + FC_WORDS, true);
> >> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
> >> >> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> >> {
> >> struct brcmnand_host *host = nand_get_controller_data(chip);
> >> struct brcmnand_controller *ctrl = host->ctrl;
> >> - int i, j, ret = 0;
> >> + int i, ret = 0;
> >> >> brcmnand_clear_ecc_addr(ctrl);
> >> >> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> >> if (likely(buf)) {
> >> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
> >> >> - for (j = 0; j < FC_WORDS; j++, buf++)
> >> - *buf = brcmnand_read_fc(ctrl, j);
> >> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
> >> + FC_WORDS, false);
> >> + buf += FC_WORDS;
> >> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
> >> }
> >> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >> index f1f93d85f50d..88819bc395f8 100644
> >> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >> @@ -24,6 +24,8 @@ struct brcmnand_soc {
> >> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
> >> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
> >> bool is_param);
> >> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
> >> + u32 *buffer, int fc_words, bool is_param);
> >> const struct brcmnand_io_ops *ops;
> >> };
> >> > >
> > Thanks,
> > Miquèl
> >
Thanks,
Miquèl
On 06/07/2023 11:15 PM, Miquel Raynal wrote:
> Hi William,
>
> [email protected] wrote on Wed, 7 Jun 2023 13:12:02 -0700:
>
>> Hi Miquel,
>>
>> On 06/07/2023 01:20 AM, Miquel Raynal wrote:
>>> Hi William,
>>>
>>> [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
>>>
>>>> The BCMBCA broadband SoC integrates the NAND controller differently than
>>>> STB, iProc and other SoCs. It has different endianness for NAND cache
>>>> data and ONFI parameter data.
>>>>
>>>> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
>>>> and performance improvement using the optimized memcpy function on NAND
>>>> cache memory.
>>>>
>>>> Signed-off-by: William Zhang <[email protected]>
>>>> ---
>>>>
>>>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
>>>> 3 files changed, 68 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>> index 7e48b6a0bfa2..899103a62c98 100644
>>>> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>> @@ -26,6 +26,18 @@ enum {
>>>> BCMBCA_CTLRDY = BIT(4),
>>>> };
>>>> >> +#if defined(CONFIG_ARM64)
>>>> +#define ALIGN_REQ 8
>>>> +#else
>>>> +#define ALIGN_REQ 4
>>>> +#endif
>>>> +
>>>> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
>>>> +{
>>>> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
>>>> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
>>>> +}
>>>> +
>>>> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
>>>> {
>>>> struct bcmbca_nand_soc *priv =
>>>> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
>>>> brcmnand_writel(val, mmio);
>>>> }
>>>> >> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
>>>> + void __iomem *flash_cache, u32 *buffer,
>>>> + int fc_words, bool is_param)
>>>> +{
>>>> + int i;
>>>> +
>>>> + if (!is_param) {
>>>> + /*
>>>> + * memcpy can do unaligned aligned access depending on source
>>>> + * and dest address, which is incompatible with nand cache. Fallback
>>>> + * to the memcpy for io version
>>>> + */
>>>> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
>>>> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
>>>> + else
>>>> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
>>>> + } else {
>>>> + /* Flash cache has same endian as the host for parameter pages */
>>>> + for (i = 0; i < fc_words; i++, buffer++)
>>>> + *buffer = __raw_readl(flash_cache + i * 4);
>>>> + }
>>>> +}
>>>> +
>>>> static int bcmbca_nand_probe(struct platform_device *pdev)
>>>> {
>>>> struct device *dev = &pdev->dev;
>>>> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
>>>> >> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
>>>> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
>>>> + soc->read_data_bus = bcmbca_read_data_bus;
>>>> >> return brcmnand_probe(pdev, soc);
>>>> }
>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>> index d920e88c7f5b..656be4d73016 100644
>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
>>>> return brcmnand_readl(ctrl->edu_base + offs);
>>>> }
>>>> >> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
>>>> + void __iomem *flash_cache, u32 *buffer,
>>>> + int fc_words, bool is_param)
>>>
>>> I strongly dislike this "is_param" boolean.
>>>
>>> When is the data in host endianness? When is it not?
>> This is little bit complicated. We have two type data read from nand cache. One for page read and the other for parameter and onfi data read from the controller side. But it depends on how SoC integrate the nand cache to system. In broadband SoC, both page and parameter data are in host endianess but other SoCs is not the same.
>>
>> I am open to suggestion for is_param function argument but to factor out this common code in more structured way, I don't see other way around.
>
> Alright, so this is SoC dependent, very well -> a (sub)compatible per
> SoC + platform data associated to it with the right function.
>
Right we have per SoC compatible and can have per SoC implementation but
I prefer to have a default implementation in the brcmnand.c because
right now only bcmcba SoC need some different handling. The other four
implementations are the same.
To make the code a little more readable and less complicated, I am
thinking to separate the brcmnand_read_data_bus into
brcmnand_read_page_data and brcmnand_read_param_data as default in
brcmnand.c. But bcmbca will override them. Would that be okay with you?
>>> If we think about an exec_op() conversion and drop cmdfunc(), what
>>> would be the discriminant?
>>>
>> If we need to implement exec_op in the future, the data is not coming from nand cache but some other low level data register which may not subject to the endianess issue.
>
> Can't you use the same cache all the time here as well then? And avoid
> the need for this overly complex logic?
>
Unfortunately exec_op will not use nand cache for parameter data read
but some other low level data register. This is dictated by the controller.
>>
>>>> +{
>>>> + struct brcmnand_soc *soc = ctrl->soc;
>>>> + int i;
>>>> +
>>>> + if (soc->read_data_bus) {
>>>> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
>>>> + } else {
>>>> + if (!is_param) {
>>>> + for (i = 0; i < fc_words; i++, buffer++)
>>>> + *buffer = brcmnand_read_fc(ctrl, i);
>>>> + } else {
>>>> + for (i = 0; i < fc_words; i++)
>>>> + /*
>>>> + * Flash cache is big endian for parameter pages, at
>>>> + * least on STB SoCs
>>>> + */
>>>> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>>>> + }
>>>> + }
>>>> +}
>>>> +
>>>> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
>>>> {
>>>> >> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
>>>> native_cmd == CMD_PARAMETER_CHANGE_COL) {
>>>> /* Copy flash cache word-wise */
>>>> u32 *flash_cache = (u32 *)ctrl->flash_cache;
>>>> - int i;
>>>> >> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
>>>> >> - /*
>>>> - * Must cache the FLASH_CACHE now, since changes in
>>>> - * SECTOR_SIZE_1K may invalidate it
>>>> - */
>>>> - for (i = 0; i < FC_WORDS; i++)
>>>> - /*
>>>> - * Flash cache is big endian for parameter pages, at
>>>> - * least on STB SoCs
>>>> - */
>>>> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
>>>> + FC_WORDS, true);
>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
>>>> >> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>>>> {
>>>> struct brcmnand_host *host = nand_get_controller_data(chip);
>>>> struct brcmnand_controller *ctrl = host->ctrl;
>>>> - int i, j, ret = 0;
>>>> + int i, ret = 0;
>>>> >> brcmnand_clear_ecc_addr(ctrl);
>>>> >> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>>>> if (likely(buf)) {
>>>> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
>>>> >> - for (j = 0; j < FC_WORDS; j++, buf++)
>>>> - *buf = brcmnand_read_fc(ctrl, j);
>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
>>>> + FC_WORDS, false);
>>>> + buf += FC_WORDS;
>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
>>>> }
>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>> index f1f93d85f50d..88819bc395f8 100644
>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>> @@ -24,6 +24,8 @@ struct brcmnand_soc {
>>>> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
>>>> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
>>>> bool is_param);
>>>> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
>>>> + u32 *buffer, int fc_words, bool is_param);
>>>> const struct brcmnand_io_ops *ops;
>>>> };
>>>> > >
>>> Thanks,
>>> Miquèl
>>>
>
>
> Thanks,
> Miquèl
>
On 06/07/2023 11:18 PM, Miquel Raynal wrote:
> Hi William,
>
> [email protected] wrote on Wed, 7 Jun 2023 13:24:23 -0700:
>
>> Hi Miquel,
>>
>> On 06/07/2023 01:22 AM, Miquel Raynal wrote:
>>> Hi William,
>>>
>>> [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
>>>
>>>> The BCMBCA broadband SoC integrates the NAND controller differently than
>>>> STB, iProc and other SoCs. It has different endianness for NAND cache
>>>> data and ONFI parameter data.
>>>>
>>>> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
>>>> and performance improvement using the optimized memcpy function on NAND
>>>> cache memory.
>>>>
>>>> Signed-off-by: William Zhang <[email protected]>
>>>> ---
>>>>
>>>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
>>>> 3 files changed, 68 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>> index 7e48b6a0bfa2..899103a62c98 100644
>>>> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>> @@ -26,6 +26,18 @@ enum {
>>>> BCMBCA_CTLRDY = BIT(4),
>>>> };
>>>> >> +#if defined(CONFIG_ARM64)
>>>> +#define ALIGN_REQ 8
>>>> +#else
>>>> +#define ALIGN_REQ 4
>>>> +#endif
>>>> +
>>>> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
>>>> +{
>>>> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
>>>> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
>>>> +}
>>>> +
>>>> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
>>>> {
>>>> struct bcmbca_nand_soc *priv =
>>>> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
>>>> brcmnand_writel(val, mmio);
>>>> }
>>>> >> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
>>>> + void __iomem *flash_cache, u32 *buffer,
>>>> + int fc_words, bool is_param)
>>>> +{
>>>> + int i;
>>>> +
>>>> + if (!is_param) {
>>>> + /*
>>>> + * memcpy can do unaligned aligned access depending on source
>>>> + * and dest address, which is incompatible with nand cache. Fallback
>>>> + * to the memcpy for io version
>>>> + */
>>>> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
>>>> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
>>>> + else
>>>> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
>>>> + } else {
>>>> + /* Flash cache has same endian as the host for parameter pages */
>>>> + for (i = 0; i < fc_words; i++, buffer++)
>>>> + *buffer = __raw_readl(flash_cache + i * 4);
>>>> + }
>>>> +}
>>>> +
>>>> static int bcmbca_nand_probe(struct platform_device *pdev)
>>>> {
>>>> struct device *dev = &pdev->dev;
>>>> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
>>>> >> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
>>>> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
>>>> + soc->read_data_bus = bcmbca_read_data_bus;
>>>> >> return brcmnand_probe(pdev, soc);
>>>> }
>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>> index d920e88c7f5b..656be4d73016 100644
>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
>>>> return brcmnand_readl(ctrl->edu_base + offs);
>>>> }
>>>> >> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
>>>> + void __iomem *flash_cache, u32 *buffer,
>>>> + int fc_words, bool is_param)
>>>> +{
>>>> + struct brcmnand_soc *soc = ctrl->soc;
>>>> + int i;
>>>> +
>>>> + if (soc->read_data_bus) {
>>>> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
>>>> + } else {
>>>> + if (!is_param) {
>>>> + for (i = 0; i < fc_words; i++, buffer++)
>>>> + *buffer = brcmnand_read_fc(ctrl, i);
>>>> + } else {
>>>> + for (i = 0; i < fc_words; i++)
>>>> + /*
>>>> + * Flash cache is big endian for parameter pages, at
>>>> + * least on STB SoCs
>>>> + */
>>>> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>>>> + }
>>>> + }
>>>
>>> Perhaps we could have a single function that is statically assigned at
>>> probe time instead of a first helper with two conditions which calls in
>>> one case another hook... This can be simplified I guess.
>>>
>> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
>
> You told me in case we would use exec_op we could avoid the param
> cache. If that's true then the whole support can be simplified.
>
Correct we may possibly unified the parameter data read but exec_op is
long shot and we are not fully ready for that yet. It also depends on if
the low level data register has endianess difference for the parameter
data between difference SoCs.
So I would like to push the current implementation and we can explore
the exec_op option late which will be a much big and complete different
implementation.
>> Not sure how much this can be simplified... Or we have default
>> implementation in brcmnand.c but then there is one condition check
>> too. Page read is done at 512 bytes burst. One or two conditions
>> check outside of the per 512 bytes read loop does not sounds too bad
>> if performance is concern.
>
> It is unreadable. That is my main concern.
>
>>
>>>> +}
>>>> +
>>>> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
>>>> {
>>>> >> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
>>>> native_cmd == CMD_PARAMETER_CHANGE_COL) {
>>>> /* Copy flash cache word-wise */
>>>> u32 *flash_cache = (u32 *)ctrl->flash_cache;
>>>> - int i;
>>>> >> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
>>>> >> - /*
>>>> - * Must cache the FLASH_CACHE now, since changes in
>>>> - * SECTOR_SIZE_1K may invalidate it
>>>> - */
>>>> - for (i = 0; i < FC_WORDS; i++)
>>>> - /*
>>>> - * Flash cache is big endian for parameter pages, at
>>>> - * least on STB SoCs
>>>> - */
>>>> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
>>>> + FC_WORDS, true);
>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
>>>> >> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>>>> {
>>>> struct brcmnand_host *host = nand_get_controller_data(chip);
>>>> struct brcmnand_controller *ctrl = host->ctrl;
>>>> - int i, j, ret = 0;
>>>> + int i, ret = 0;
>>>> >> brcmnand_clear_ecc_addr(ctrl);
>>>> >> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>>>> if (likely(buf)) {
>>>> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
>>>> >> - for (j = 0; j < FC_WORDS; j++, buf++)
>>>> - *buf = brcmnand_read_fc(ctrl, j);
>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
>>>> + FC_WORDS, false);
>>>> + buf += FC_WORDS;
>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
>>>> }
>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>> index f1f93d85f50d..88819bc395f8 100644
>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>> @@ -24,6 +24,8 @@ struct brcmnand_soc {
>>>> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
>>>> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
>>>> bool is_param);
>>>> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
>>>> + u32 *buffer, int fc_words, bool is_param);
>>>> const struct brcmnand_io_ops *ops;
>>>> };
>>>> > >
>>> Thanks,
>>> Miquèl
>>>
>
>
> Thanks,
> Miquèl
>
Hi William,
[email protected] wrote on Thu, 8 Jun 2023 12:10:06 -0700:
> On 06/07/2023 11:18 PM, Miquel Raynal wrote:
> > Hi William,
> >
> > [email protected] wrote on Wed, 7 Jun 2023 13:24:23 -0700:
> >
> >> Hi Miquel,
> >>
> >> On 06/07/2023 01:22 AM, Miquel Raynal wrote:
> >>> Hi William,
> >>>
> >>> [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
> >>> >>>> The BCMBCA broadband SoC integrates the NAND controller differently than
> >>>> STB, iProc and other SoCs. It has different endianness for NAND cache
> >>>> data and ONFI parameter data.
> >>>>
> >>>> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
> >>>> and performance improvement using the optimized memcpy function on NAND
> >>>> cache memory.
> >>>>
> >>>> Signed-off-by: William Zhang <[email protected]>
> >>>> ---
> >>>>
> >>>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
> >>>> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
> >>>> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
> >>>> 3 files changed, 68 insertions(+), 14 deletions(-)
> >>>>
> >>>> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >>>> index 7e48b6a0bfa2..899103a62c98 100644
> >>>> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >>>> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >>>> @@ -26,6 +26,18 @@ enum {
> >>>> BCMBCA_CTLRDY = BIT(4),
> >>>> };
> >>>> >> +#if defined(CONFIG_ARM64)
> >>>> +#define ALIGN_REQ 8
> >>>> +#else
> >>>> +#define ALIGN_REQ 4
> >>>> +#endif
> >>>> +
> >>>> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
> >>>> +{
> >>>> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
> >>>> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
> >>>> +}
> >>>> +
> >>>> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
> >>>> {
> >>>> struct bcmbca_nand_soc *priv =
> >>>> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
> >>>> brcmnand_writel(val, mmio);
> >>>> }
> >>>> >> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
> >>>> + void __iomem *flash_cache, u32 *buffer,
> >>>> + int fc_words, bool is_param)
> >>>> +{
> >>>> + int i;
> >>>> +
> >>>> + if (!is_param) {
> >>>> + /*
> >>>> + * memcpy can do unaligned aligned access depending on source
> >>>> + * and dest address, which is incompatible with nand cache. Fallback
> >>>> + * to the memcpy for io version
> >>>> + */
> >>>> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
> >>>> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
> >>>> + else
> >>>> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
> >>>> + } else {
> >>>> + /* Flash cache has same endian as the host for parameter pages */
> >>>> + for (i = 0; i < fc_words; i++, buffer++)
> >>>> + *buffer = __raw_readl(flash_cache + i * 4);
> >>>> + }
> >>>> +}
> >>>> +
> >>>> static int bcmbca_nand_probe(struct platform_device *pdev)
> >>>> {
> >>>> struct device *dev = &pdev->dev;
> >>>> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
> >>>> >> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
> >>>> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
> >>>> + soc->read_data_bus = bcmbca_read_data_bus;
> >>>> >> return brcmnand_probe(pdev, soc);
> >>>> }
> >>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >>>> index d920e88c7f5b..656be4d73016 100644
> >>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >>>> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
> >>>> return brcmnand_readl(ctrl->edu_base + offs);
> >>>> }
> >>>> >> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
> >>>> + void __iomem *flash_cache, u32 *buffer,
> >>>> + int fc_words, bool is_param)
> >>>> +{
> >>>> + struct brcmnand_soc *soc = ctrl->soc;
> >>>> + int i;
> >>>> +
> >>>> + if (soc->read_data_bus) {
> >>>> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
> >>>> + } else {
> >>>> + if (!is_param) {
> >>>> + for (i = 0; i < fc_words; i++, buffer++)
> >>>> + *buffer = brcmnand_read_fc(ctrl, i);
> >>>> + } else {
> >>>> + for (i = 0; i < fc_words; i++)
> >>>> + /*
> >>>> + * Flash cache is big endian for parameter pages, at
> >>>> + * least on STB SoCs
> >>>> + */
> >>>> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> >>>> + }
> >>>> + }
> >>>
> >>> Perhaps we could have a single function that is statically assigned at
> >>> probe time instead of a first helper with two conditions which calls in
> >>> one case another hook... This can be simplified I guess.
> >>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
> >
> > You told me in case we would use exec_op we could avoid the param
> > cache. If that's true then the whole support can be simplified.
> >
> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
>
> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
I am sorry but this series is totally backwards, you're trying to guess
what comes next with the 'is_param' thing, it's exactly what we are
fighting against since 2017. There are plenty of ->exec_op()
conversions out there, I don't believe this one will be harder. You
need to convert the driver to this new API and get rid of this whole
endianness non-sense to simplify a lot the driver.
>
> >> Not sure how much this can be simplified... Or we have default
> >> implementation in brcmnand.c but then there is one condition check
> >> too. Page read is done at 512 bytes burst. One or two conditions
> >> check outside of the per 512 bytes read loop does not sounds too bad
> >> if performance is concern.
> >
> > It is unreadable. That is my main concern.
> >
> >>
> >>>> +}
> >>>> +
> >>>> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
> >>>> {
> >>>> >> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
> >>>> native_cmd == CMD_PARAMETER_CHANGE_COL) {
> >>>> /* Copy flash cache word-wise */
> >>>> u32 *flash_cache = (u32 *)ctrl->flash_cache;
> >>>> - int i;
> >>>> >> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
> >>>> >> - /*
> >>>> - * Must cache the FLASH_CACHE now, since changes in
> >>>> - * SECTOR_SIZE_1K may invalidate it
> >>>> - */
> >>>> - for (i = 0; i < FC_WORDS; i++)
> >>>> - /*
> >>>> - * Flash cache is big endian for parameter pages, at
> >>>> - * least on STB SoCs
> >>>> - */
> >>>> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> >>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
> >>>> + FC_WORDS, true);
> >>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
> >>>> >> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> >>>> {
> >>>> struct brcmnand_host *host = nand_get_controller_data(chip);
> >>>> struct brcmnand_controller *ctrl = host->ctrl;
> >>>> - int i, j, ret = 0;
> >>>> + int i, ret = 0;
> >>>> >> brcmnand_clear_ecc_addr(ctrl);
> >>>> >> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> >>>> if (likely(buf)) {
> >>>> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
> >>>> >> - for (j = 0; j < FC_WORDS; j++, buf++)
> >>>> - *buf = brcmnand_read_fc(ctrl, j);
> >>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
> >>>> + FC_WORDS, false);
> >>>> + buf += FC_WORDS;
> >>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
> >>>> }
> >>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >>>> index f1f93d85f50d..88819bc395f8 100644
> >>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >>>> @@ -24,6 +24,8 @@ struct brcmnand_soc {
> >>>> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
> >>>> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
> >>>> bool is_param);
> >>>> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
> >>>> + u32 *buffer, int fc_words, bool is_param);
> >>>> const struct brcmnand_io_ops *ops;
> >>>> };
> >>>> > >
> >>> Thanks,
> >>> Miquèl
> >>> > >
> > Thanks,
> > Miquèl
> >
Thanks,
Miquèl
Hi Miquel,
On 06/09/2023 01:35 AM, Miquel Raynal wrote:
> Hi William,
>
> [email protected] wrote on Thu, 8 Jun 2023 12:10:06 -0700:
>
>> On 06/07/2023 11:18 PM, Miquel Raynal wrote:
>>> Hi William,
>>>
>>> [email protected] wrote on Wed, 7 Jun 2023 13:24:23 -0700:
>>>
>>>> Hi Miquel,
>>>>
>>>> On 06/07/2023 01:22 AM, Miquel Raynal wrote:
>>>>> Hi William,
>>>>>
>>>>> [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
>>>>> >>>> The BCMBCA broadband SoC integrates the NAND controller differently than
>>>>>> STB, iProc and other SoCs. It has different endianness for NAND cache
>>>>>> data and ONFI parameter data.
>>>>>>
>>>>>> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
>>>>>> and performance improvement using the optimized memcpy function on NAND
>>>>>> cache memory.
>>>>>>
>>>>>> Signed-off-by: William Zhang <[email protected]>
>>>>>> ---
>>>>>>
>>>>>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
>>>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
>>>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
>>>>>> 3 files changed, 68 insertions(+), 14 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>>>> index 7e48b6a0bfa2..899103a62c98 100644
>>>>>> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>>>> @@ -26,6 +26,18 @@ enum {
>>>>>> BCMBCA_CTLRDY = BIT(4),
>>>>>> };
>>>>>> >> +#if defined(CONFIG_ARM64)
>>>>>> +#define ALIGN_REQ 8
>>>>>> +#else
>>>>>> +#define ALIGN_REQ 4
>>>>>> +#endif
>>>>>> +
>>>>>> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
>>>>>> +{
>>>>>> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
>>>>>> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
>>>>>> +}
>>>>>> +
>>>>>> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
>>>>>> {
>>>>>> struct bcmbca_nand_soc *priv =
>>>>>> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
>>>>>> brcmnand_writel(val, mmio);
>>>>>> }
>>>>>> >> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
>>>>>> + void __iomem *flash_cache, u32 *buffer,
>>>>>> + int fc_words, bool is_param)
>>>>>> +{
>>>>>> + int i;
>>>>>> +
>>>>>> + if (!is_param) {
>>>>>> + /*
>>>>>> + * memcpy can do unaligned aligned access depending on source
>>>>>> + * and dest address, which is incompatible with nand cache. Fallback
>>>>>> + * to the memcpy for io version
>>>>>> + */
>>>>>> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
>>>>>> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
>>>>>> + else
>>>>>> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
>>>>>> + } else {
>>>>>> + /* Flash cache has same endian as the host for parameter pages */
>>>>>> + for (i = 0; i < fc_words; i++, buffer++)
>>>>>> + *buffer = __raw_readl(flash_cache + i * 4);
>>>>>> + }
>>>>>> +}
>>>>>> +
>>>>>> static int bcmbca_nand_probe(struct platform_device *pdev)
>>>>>> {
>>>>>> struct device *dev = &pdev->dev;
>>>>>> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
>>>>>> >> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
>>>>>> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
>>>>>> + soc->read_data_bus = bcmbca_read_data_bus;
>>>>>> >> return brcmnand_probe(pdev, soc);
>>>>>> }
>>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>>>> index d920e88c7f5b..656be4d73016 100644
>>>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>>>> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
>>>>>> return brcmnand_readl(ctrl->edu_base + offs);
>>>>>> }
>>>>>> >> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
>>>>>> + void __iomem *flash_cache, u32 *buffer,
>>>>>> + int fc_words, bool is_param)
>>>>>> +{
>>>>>> + struct brcmnand_soc *soc = ctrl->soc;
>>>>>> + int i;
>>>>>> +
>>>>>> + if (soc->read_data_bus) {
>>>>>> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
>>>>>> + } else {
>>>>>> + if (!is_param) {
>>>>>> + for (i = 0; i < fc_words; i++, buffer++)
>>>>>> + *buffer = brcmnand_read_fc(ctrl, i);
>>>>>> + } else {
>>>>>> + for (i = 0; i < fc_words; i++)
>>>>>> + /*
>>>>>> + * Flash cache is big endian for parameter pages, at
>>>>>> + * least on STB SoCs
>>>>>> + */
>>>>>> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>>>>>> + }
>>>>>> + }
>>>>>
>>>>> Perhaps we could have a single function that is statically assigned at
>>>>> probe time instead of a first helper with two conditions which calls in
>>>>> one case another hook... This can be simplified I guess.
>>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
>>>
>>> You told me in case we would use exec_op we could avoid the param
>>> cache. If that's true then the whole support can be simplified.
>>>
>> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
>>
>> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
>
> I am sorry but this series is totally backwards, you're trying to guess
> what comes next with the 'is_param' thing, it's exactly what we are
> fighting against since 2017. There are plenty of ->exec_op()
> conversions out there, I don't believe this one will be harder. You
> need to convert the driver to this new API and get rid of this whole
> endianness non-sense to simplify a lot the driver.
>
I am not guessing anything but just factor out the existing common nand
cache read logic into the single default function(or one for page read
and another for parameter read as I mentioned in another thread) and
allow SoC to overrides the implementation when needed.
I agree ->exec_op can possibly get rid of the parameter page read
function and is the way to go. But it won't help on the page read for
endianess. It's not that I am against exec_op but I want to take one
step a time and I'd like to get these fixes and support for bcmbca soc
first and then work on the exec_op API to minimize the change and reduce
the risk.
>>
>>>> Not sure how much this can be simplified... Or we have default
>>>> implementation in brcmnand.c but then there is one condition check
>>>> too. Page read is done at 512 bytes burst. One or two conditions
>>>> check outside of the per 512 bytes read loop does not sounds too bad
>>>> if performance is concern.
>>>
>>> It is unreadable. That is my main concern.
>>>
>>>>
>>>>>> +}
>>>>>> +
>>>>>> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
>>>>>> {
>>>>>> >> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
>>>>>> native_cmd == CMD_PARAMETER_CHANGE_COL) {
>>>>>> /* Copy flash cache word-wise */
>>>>>> u32 *flash_cache = (u32 *)ctrl->flash_cache;
>>>>>> - int i;
>>>>>> >> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
>>>>>> >> - /*
>>>>>> - * Must cache the FLASH_CACHE now, since changes in
>>>>>> - * SECTOR_SIZE_1K may invalidate it
>>>>>> - */
>>>>>> - for (i = 0; i < FC_WORDS; i++)
>>>>>> - /*
>>>>>> - * Flash cache is big endian for parameter pages, at
>>>>>> - * least on STB SoCs
>>>>>> - */
>>>>>> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>>>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
>>>>>> + FC_WORDS, true);
>>>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
>>>>>> >> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>>>>>> {
>>>>>> struct brcmnand_host *host = nand_get_controller_data(chip);
>>>>>> struct brcmnand_controller *ctrl = host->ctrl;
>>>>>> - int i, j, ret = 0;
>>>>>> + int i, ret = 0;
>>>>>> >> brcmnand_clear_ecc_addr(ctrl);
>>>>>> >> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>>>>>> if (likely(buf)) {
>>>>>> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
>>>>>> >> - for (j = 0; j < FC_WORDS; j++, buf++)
>>>>>> - *buf = brcmnand_read_fc(ctrl, j);
>>>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
>>>>>> + FC_WORDS, false);
>>>>>> + buf += FC_WORDS;
>>>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
>>>>>> }
>>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>>>> index f1f93d85f50d..88819bc395f8 100644
>>>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>>>> @@ -24,6 +24,8 @@ struct brcmnand_soc {
>>>>>> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
>>>>>> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
>>>>>> bool is_param);
>>>>>> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
>>>>>> + u32 *buffer, int fc_words, bool is_param);
>>>>>> const struct brcmnand_io_ops *ops;
>>>>>> };
>>>>>> > >
>>>>> Thanks,
>>>>> Miquèl
>>>>> > >
>>> Thanks,
>>> Miquèl
>>>
>
>
> Thanks,
> Miquèl
>
Hi William,
kernel test robot noticed the following build warnings:
[auto build test WARNING on mtd/nand/next]
[also build test WARNING on mtd/mtd/next mtd/mtd/fixes linus/master v6.4-rc5 next-20230609]
[cannot apply to robh/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/William-Zhang/mtd-rawnand-brcmnand-Fix-ECC-level-field-setting-for-v7-2-controller/20230607-071834
base: https://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git nand/next
patch link: https://lore.kernel.org/r/20230606231252.94838-11-william.zhang%40broadcom.com
patch subject: [PATCH 10/12] mtd: rawnand: brcmnand: Add BCMBCA read data bus interface
config: arm-randconfig-s052-20230611 (https://download.01.org/0day-ci/archive/20230611/[email protected]/config)
compiler: arm-linux-gnueabi-gcc (GCC) 12.3.0
reproduce:
mkdir -p ~/bin
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.4-39-gce1a6720-dirty
# https://github.com/intel-lab-lkp/linux/commit/2fce7300f3e21ad0cb55442e1acfeaf60f41bf7d
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review William-Zhang/mtd-rawnand-brcmnand-Fix-ECC-level-field-setting-for-v7-2-controller/20230607-071834
git checkout 2fce7300f3e21ad0cb55442e1acfeaf60f41bf7d
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=arm olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=arm SHELL=/bin/bash drivers/mtd/nand/raw/brcmnand/
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
sparse warnings: (new ones prefixed by >>)
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:83:48: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *flash_cache @@ got void [noderef] __iomem *flash_cache @@
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:83:48: sparse: expected void *flash_cache
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:83:48: sparse: got void [noderef] __iomem *flash_cache
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:84:25: sparse: sparse: cast removes address space '__iomem' of expression
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:84:25: sparse: sparse: cast removes address space '__iomem' of expression
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:84:25: sparse: sparse: cast removes address space '__iomem' of expression
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:86:25: sparse: sparse: cast removes address space '__iomem' of expression
>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:86:25: sparse: sparse: incorrect type in argument 2 (different address spaces) @@ expected void const volatile [noderef] __iomem *from @@ got void * @@
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:86:25: sparse: expected void const volatile [noderef] __iomem *from
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c:86:25: sparse: got void *
vim +86 drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
70
71 static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
72 void __iomem *flash_cache, u32 *buffer,
73 int fc_words, bool is_param)
74 {
75 int i;
76
77 if (!is_param) {
78 /*
79 * memcpy can do unaligned aligned access depending on source
80 * and dest address, which is incompatible with nand cache. Fallback
81 * to the memcpy for io version
82 */
> 83 if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
84 memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
85 else
> 86 memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
87 } else {
88 /* Flash cache has same endian as the host for parameter pages */
89 for (i = 0; i < fc_words; i++, buffer++)
90 *buffer = __raw_readl(flash_cache + i * 4);
91 }
92 }
93
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi William,
[email protected] wrote on Fri, 9 Jun 2023 12:16:27 -0700:
> Hi Miquel,
>
> On 06/09/2023 01:35 AM, Miquel Raynal wrote:
> > Hi William,
> >
> > [email protected] wrote on Thu, 8 Jun 2023 12:10:06 -0700:
> >
> >> On 06/07/2023 11:18 PM, Miquel Raynal wrote:
> >>> Hi William,
> >>>
> >>> [email protected] wrote on Wed, 7 Jun 2023 13:24:23 -0700:
> >>> >>>> Hi Miquel,
> >>>>
> >>>> On 06/07/2023 01:22 AM, Miquel Raynal wrote:
> >>>>> Hi William,
> >>>>>
> >>>>> [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
> >>>>> >>>> The BCMBCA broadband SoC integrates the NAND controller differently than
> >>>>>> STB, iProc and other SoCs. It has different endianness for NAND cache
> >>>>>> data and ONFI parameter data.
> >>>>>>
> >>>>>> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
> >>>>>> and performance improvement using the optimized memcpy function on NAND
> >>>>>> cache memory.
> >>>>>>
> >>>>>> Signed-off-by: William Zhang <[email protected]>
> >>>>>> ---
> >>>>>>
> >>>>>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
> >>>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
> >>>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
> >>>>>> 3 files changed, 68 insertions(+), 14 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >>>>>> index 7e48b6a0bfa2..899103a62c98 100644
> >>>>>> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
> >>>>>> @@ -26,6 +26,18 @@ enum {
> >>>>>> BCMBCA_CTLRDY = BIT(4),
> >>>>>> };
> >>>>>> >> +#if defined(CONFIG_ARM64)
> >>>>>> +#define ALIGN_REQ 8
> >>>>>> +#else
> >>>>>> +#define ALIGN_REQ 4
> >>>>>> +#endif
> >>>>>> +
> >>>>>> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
> >>>>>> +{
> >>>>>> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
> >>>>>> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
> >>>>>> +}
> >>>>>> +
> >>>>>> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
> >>>>>> {
> >>>>>> struct bcmbca_nand_soc *priv =
> >>>>>> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
> >>>>>> brcmnand_writel(val, mmio);
> >>>>>> }
> >>>>>> >> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
> >>>>>> + void __iomem *flash_cache, u32 *buffer,
> >>>>>> + int fc_words, bool is_param)
> >>>>>> +{
> >>>>>> + int i;
> >>>>>> +
> >>>>>> + if (!is_param) {
> >>>>>> + /*
> >>>>>> + * memcpy can do unaligned aligned access depending on source
> >>>>>> + * and dest address, which is incompatible with nand cache. Fallback
> >>>>>> + * to the memcpy for io version
> >>>>>> + */
> >>>>>> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
> >>>>>> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
> >>>>>> + else
> >>>>>> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
> >>>>>> + } else {
> >>>>>> + /* Flash cache has same endian as the host for parameter pages */
> >>>>>> + for (i = 0; i < fc_words; i++, buffer++)
> >>>>>> + *buffer = __raw_readl(flash_cache + i * 4);
> >>>>>> + }
> >>>>>> +}
> >>>>>> +
> >>>>>> static int bcmbca_nand_probe(struct platform_device *pdev)
> >>>>>> {
> >>>>>> struct device *dev = &pdev->dev;
> >>>>>> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
> >>>>>> >> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
> >>>>>> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
> >>>>>> + soc->read_data_bus = bcmbca_read_data_bus;
> >>>>>> >> return brcmnand_probe(pdev, soc);
> >>>>>> }
> >>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >>>>>> index d920e88c7f5b..656be4d73016 100644
> >>>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> >>>>>> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
> >>>>>> return brcmnand_readl(ctrl->edu_base + offs);
> >>>>>> }
> >>>>>> >> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
> >>>>>> + void __iomem *flash_cache, u32 *buffer,
> >>>>>> + int fc_words, bool is_param)
> >>>>>> +{
> >>>>>> + struct brcmnand_soc *soc = ctrl->soc;
> >>>>>> + int i;
> >>>>>> +
> >>>>>> + if (soc->read_data_bus) {
> >>>>>> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
> >>>>>> + } else {
> >>>>>> + if (!is_param) {
> >>>>>> + for (i = 0; i < fc_words; i++, buffer++)
> >>>>>> + *buffer = brcmnand_read_fc(ctrl, i);
> >>>>>> + } else {
> >>>>>> + for (i = 0; i < fc_words; i++)
> >>>>>> + /*
> >>>>>> + * Flash cache is big endian for parameter pages, at
> >>>>>> + * least on STB SoCs
> >>>>>> + */
> >>>>>> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> >>>>>> + }
> >>>>>> + }
> >>>>>
> >>>>> Perhaps we could have a single function that is statically assigned at
> >>>>> probe time instead of a first helper with two conditions which calls in
> >>>>> one case another hook... This can be simplified I guess.
> >>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
> >>>
> >>> You told me in case we would use exec_op we could avoid the param
> >>> cache. If that's true then the whole support can be simplified.
> >>> >> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
> >>
> >> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
> >
> > I am sorry but this series is totally backwards, you're trying to guess
> > what comes next with the 'is_param' thing, it's exactly what we are
> > fighting against since 2017. There are plenty of ->exec_op()
> > conversions out there, I don't believe this one will be harder. You
> > need to convert the driver to this new API and get rid of this whole
> > endianness non-sense to simplify a lot the driver.
> >
> I am not guessing anything but just factor out the existing common nand cache read logic into the single default function(or one for page read and another for parameter read as I mentioned in another thread) and allow SoC to overrides the implementation when needed.
No, you are trying to guess what type of read the core is performing,
either a regular data page read or a parameter page read.
> I agree ->exec_op can possibly get rid of the parameter page read function and is the way to go. But it won't help on the page read for endianess.
You told me there is no endianess issue with the data pages, so why it
won't help on the page read?
> It's not that I am against exec_op but I want to take one step a time
> and I'd like to get these fixes
I don't see any fix here? Let me know if I am missing something but
right now I see a new version of the controller being supported with
its own constraints. If you are fixing existing code for already
supported platform, then make it clear and we can discuss this. But if
you just want to support the bcmbca flavor, then there is no risk
mitigation involved here, and a conversion is the right step :)
> and support for bcmbca soc first and
> then work on the exec_op API to minimize the change and reduce the
> risk.
>
> >>
> >>>> Not sure how much this can be simplified... Or we have default
> >>>> implementation in brcmnand.c but then there is one condition check
> >>>> too. Page read is done at 512 bytes burst. One or two conditions
> >>>> check outside of the per 512 bytes read loop does not sounds too bad
> >>>> if performance is concern.
> >>>
> >>> It is unreadable. That is my main concern.
> >>> >>>> >>>>>> +}
> >>>>>> +
> >>>>>> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
> >>>>>> {
> >>>>>> >> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
> >>>>>> native_cmd == CMD_PARAMETER_CHANGE_COL) {
> >>>>>> /* Copy flash cache word-wise */
> >>>>>> u32 *flash_cache = (u32 *)ctrl->flash_cache;
> >>>>>> - int i;
> >>>>>> >> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
> >>>>>> >> - /*
> >>>>>> - * Must cache the FLASH_CACHE now, since changes in
> >>>>>> - * SECTOR_SIZE_1K may invalidate it
> >>>>>> - */
> >>>>>> - for (i = 0; i < FC_WORDS; i++)
> >>>>>> - /*
> >>>>>> - * Flash cache is big endian for parameter pages, at
> >>>>>> - * least on STB SoCs
> >>>>>> - */
> >>>>>> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
> >>>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
> >>>>>> + FC_WORDS, true);
> >>>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
> >>>>>> >> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> >>>>>> {
> >>>>>> struct brcmnand_host *host = nand_get_controller_data(chip);
> >>>>>> struct brcmnand_controller *ctrl = host->ctrl;
> >>>>>> - int i, j, ret = 0;
> >>>>>> + int i, ret = 0;
> >>>>>> >> brcmnand_clear_ecc_addr(ctrl);
> >>>>>> >> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
> >>>>>> if (likely(buf)) {
> >>>>>> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
> >>>>>> >> - for (j = 0; j < FC_WORDS; j++, buf++)
> >>>>>> - *buf = brcmnand_read_fc(ctrl, j);
> >>>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
> >>>>>> + FC_WORDS, false);
> >>>>>> + buf += FC_WORDS;
> >>>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
> >>>>>> }
> >>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >>>>>> index f1f93d85f50d..88819bc395f8 100644
> >>>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
> >>>>>> @@ -24,6 +24,8 @@ struct brcmnand_soc {
> >>>>>> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
> >>>>>> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
> >>>>>> bool is_param);
> >>>>>> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
> >>>>>> + u32 *buffer, int fc_words, bool is_param);
> >>>>>> const struct brcmnand_io_ops *ops;
> >>>>>> };
> >>>>>> > >
> >>>>> Thanks,
> >>>>> Miquèl
> >>>>> > >
> >>> Thanks,
> >>> Miquèl
> >>> > >
> > Thanks,
> > Miquèl
> >
Thanks,
Miquèl
Hello again,
> > >>>>> Perhaps we could have a single function that is statically assigned at
> > >>>>> probe time instead of a first helper with two conditions which calls in
> > >>>>> one case another hook... This can be simplified I guess.
> > >>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
> > >>>
> > >>> You told me in case we would use exec_op we could avoid the param
> > >>> cache. If that's true then the whole support can be simplified.
> > >>> >> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
> > >>
> > >> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
> > >
> > > I am sorry but this series is totally backwards, you're trying to guess
> > > what comes next with the 'is_param' thing, it's exactly what we are
> > > fighting against since 2017. There are plenty of ->exec_op()
> > > conversions out there, I don't believe this one will be harder. You
> > > need to convert the driver to this new API and get rid of this whole
> > > endianness non-sense to simplify a lot the driver.
> > >
> > I am not guessing anything but just factor out the existing common nand cache read logic into the single default function(or one for page read and another for parameter read as I mentioned in another thread) and allow SoC to overrides the implementation when needed.
>
> No, you are trying to guess what type of read the core is performing,
> either a regular data page read or a parameter page read.
>
> > I agree ->exec_op can possibly get rid of the parameter page read function and is the way to go. But it won't help on the page read for endianess.
>
> You told me there is no endianess issue with the data pages, so why it
> won't help on the page read?
>
> > It's not that I am against exec_op but I want to take one step a time
> > and I'd like to get these fixes
>
> I don't see any fix here? Let me know if I am missing something but
> right now I see a new version of the controller being supported with
> its own constraints. If you are fixing existing code for already
> supported platform, then make it clear and we can discuss this. But if
> you just want to support the bcmbca flavor, then there is no risk
> mitigation involved here, and a conversion is the right step :)
>
I forgot to mention: the exec_op conversion is almost ready, Boris
worked on it but he lacked the hardware so maybe you'll just need to
revive the few patches which target your platform and do a little bit of
debugging?
https://github.com/bbrezillon/linux/commits/nand/exec-op-conversion?after=8a3cf6fd25d5e15c6667f9e95c1fc86e4cb735e6+34&branch=nand%2Fexec-op-conversion&qualified_name=refs%2Fheads%2Fnand%2Fexec-op-conversion
Cheers,
Miquèl
On 06/12/2023 10:49 AM, Miquel Raynal wrote:
> Hi William,
>
> [email protected] wrote on Fri, 9 Jun 2023 12:16:27 -0700:
>
>> Hi Miquel,
>>
>> On 06/09/2023 01:35 AM, Miquel Raynal wrote:
>>> Hi William,
>>>
>>> [email protected] wrote on Thu, 8 Jun 2023 12:10:06 -0700:
>>>
>>>> On 06/07/2023 11:18 PM, Miquel Raynal wrote:
>>>>> Hi William,
>>>>>
>>>>> [email protected] wrote on Wed, 7 Jun 2023 13:24:23 -0700:
>>>>> >>>> Hi Miquel,
>>>>>>
>>>>>> On 06/07/2023 01:22 AM, Miquel Raynal wrote:
>>>>>>> Hi William,
>>>>>>>
>>>>>>> [email protected] wrote on Tue, 6 Jun 2023 16:12:50 -0700:
>>>>>>> >>>> The BCMBCA broadband SoC integrates the NAND controller differently than
>>>>>>>> STB, iProc and other SoCs. It has different endianness for NAND cache
>>>>>>>> data and ONFI parameter data.
>>>>>>>>
>>>>>>>> Add a SoC read data bus shim for BCMBCA to meet the specific SoC need
>>>>>>>> and performance improvement using the optimized memcpy function on NAND
>>>>>>>> cache memory.
>>>>>>>>
>>>>>>>> Signed-off-by: William Zhang <[email protected]>
>>>>>>>> ---
>>>>>>>>
>>>>>>>> drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c | 36 +++++++++++++++++
>>>>>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.c | 44 ++++++++++++++-------
>>>>>>>> drivers/mtd/nand/raw/brcmnand/brcmnand.h | 2 +
>>>>>>>> 3 files changed, 68 insertions(+), 14 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>>>>>> index 7e48b6a0bfa2..899103a62c98 100644
>>>>>>>> --- a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
>>>>>>>> @@ -26,6 +26,18 @@ enum {
>>>>>>>> BCMBCA_CTLRDY = BIT(4),
>>>>>>>> };
>>>>>>>> >> +#if defined(CONFIG_ARM64)
>>>>>>>> +#define ALIGN_REQ 8
>>>>>>>> +#else
>>>>>>>> +#define ALIGN_REQ 4
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache, void *buffer)
>>>>>>>> +{
>>>>>>>> + return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
>>>>>>>> + IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
>>>>>>>> {
>>>>>>>> struct bcmbca_nand_soc *priv =
>>>>>>>> @@ -56,6 +68,29 @@ static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
>>>>>>>> brcmnand_writel(val, mmio);
>>>>>>>> }
>>>>>>>> >> +static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
>>>>>>>> + void __iomem *flash_cache, u32 *buffer,
>>>>>>>> + int fc_words, bool is_param)
>>>>>>>> +{
>>>>>>>> + int i;
>>>>>>>> +
>>>>>>>> + if (!is_param) {
>>>>>>>> + /*
>>>>>>>> + * memcpy can do unaligned aligned access depending on source
>>>>>>>> + * and dest address, which is incompatible with nand cache. Fallback
>>>>>>>> + * to the memcpy for io version
>>>>>>>> + */
>>>>>>>> + if (bcmbca_nand_is_buf_aligned(flash_cache, buffer))
>>>>>>>> + memcpy((void *)buffer, (void *)flash_cache, fc_words * 4);
>>>>>>>> + else
>>>>>>>> + memcpy_fromio((void *)buffer, (void *)flash_cache, fc_words * 4);
>>>>>>>> + } else {
>>>>>>>> + /* Flash cache has same endian as the host for parameter pages */
>>>>>>>> + for (i = 0; i < fc_words; i++, buffer++)
>>>>>>>> + *buffer = __raw_readl(flash_cache + i * 4);
>>>>>>>> + }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> static int bcmbca_nand_probe(struct platform_device *pdev)
>>>>>>>> {
>>>>>>>> struct device *dev = &pdev->dev;
>>>>>>>> @@ -75,6 +110,7 @@ static int bcmbca_nand_probe(struct platform_device *pdev)
>>>>>>>> >> soc->ctlrdy_ack = bcmbca_nand_intc_ack;
>>>>>>>> soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
>>>>>>>> + soc->read_data_bus = bcmbca_read_data_bus;
>>>>>>>> >> return brcmnand_probe(pdev, soc);
>>>>>>>> }
>>>>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>>>>>> index d920e88c7f5b..656be4d73016 100644
>>>>>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
>>>>>>>> @@ -814,6 +814,30 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
>>>>>>>> return brcmnand_readl(ctrl->edu_base + offs);
>>>>>>>> }
>>>>>>>> >> +static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
>>>>>>>> + void __iomem *flash_cache, u32 *buffer,
>>>>>>>> + int fc_words, bool is_param)
>>>>>>>> +{
>>>>>>>> + struct brcmnand_soc *soc = ctrl->soc;
>>>>>>>> + int i;
>>>>>>>> +
>>>>>>>> + if (soc->read_data_bus) {
>>>>>>>> + soc->read_data_bus(soc, flash_cache, buffer, fc_words, is_param);
>>>>>>>> + } else {
>>>>>>>> + if (!is_param) {
>>>>>>>> + for (i = 0; i < fc_words; i++, buffer++)
>>>>>>>> + *buffer = brcmnand_read_fc(ctrl, i);
>>>>>>>> + } else {
>>>>>>>> + for (i = 0; i < fc_words; i++)
>>>>>>>> + /*
>>>>>>>> + * Flash cache is big endian for parameter pages, at
>>>>>>>> + * least on STB SoCs
>>>>>>>> + */
>>>>>>>> + buffer[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>>>>>>>> + }
>>>>>>>> + }
>>>>>>>
>>>>>>> Perhaps we could have a single function that is statically assigned at
>>>>>>> probe time instead of a first helper with two conditions which calls in
>>>>>>> one case another hook... This can be simplified I guess.
>>>>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
>>>>>
>>>>> You told me in case we would use exec_op we could avoid the param
>>>>> cache. If that's true then the whole support can be simplified.
>>>>> >> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
>>>>
>>>> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
>>>
>>> I am sorry but this series is totally backwards, you're trying to guess
>>> what comes next with the 'is_param' thing, it's exactly what we are
>>> fighting against since 2017. There are plenty of ->exec_op()
>>> conversions out there, I don't believe this one will be harder. You
>>> need to convert the driver to this new API and get rid of this whole
>>> endianness non-sense to simplify a lot the driver.
>>>
>> I am not guessing anything but just factor out the existing common nand cache read logic into the single default function(or one for page read and another for parameter read as I mentioned in another thread) and allow SoC to overrides the implementation when needed.
>
> No, you are trying to guess what type of read the core is performing,
> either a regular data page read or a parameter page read.
>
Okay this is what you mean by guessing. I didn't realize that ;)
>> I agree ->exec_op can possibly get rid of the parameter page read function and is the way to go. But it won't help on the page read for endianess.
>
> You told me there is no endianess issue with the data pages, so why it
> won't help on the page read?
>
Even with exec_op, the page read path for brcmand(chip->ecc.read_page)
will still need brcmnand_read_page function which eventually I need per
SoC implementation at least for bcmbca for now besides different
endianess between SoC. For bcmbca, I also use the memcpy in the patch as
the nand cache in bcmbca chip can handled the optimized copy code as
long as the buffer is aligned for better performance.
>> It's not that I am against exec_op but I want to take one step a time
>> and I'd like to get these fixes
>
> I don't see any fix here? Let me know if I am missing something but
> right now I see a new version of the controller being supported with
> its own constraints. If you are fixing existing code for already
> supported platform, then make it clear and we can discuss this. But if
> you just want to support the bcmbca flavor, then there is no risk
> mitigation involved here, and a conversion is the right step :)
>
I mean the patch 1 to 4 in this series.
The exec_op will apply to all the five SoCs under brcmnand folder, not
just bcmbca. It will take lot of time even just find people to
test/debug all of them as I don't have access to other SoC and boards,
on top of the nature of this big change.
>> and support for bcmbca soc first and
>> then work on the exec_op API to minimize the change and reduce the
>> risk.
>>
>>>>
>>>>>> Not sure how much this can be simplified... Or we have default
>>>>>> implementation in brcmnand.c but then there is one condition check
>>>>>> too. Page read is done at 512 bytes burst. One or two conditions
>>>>>> check outside of the per 512 bytes read loop does not sounds too bad
>>>>>> if performance is concern.
>>>>>
>>>>> It is unreadable. That is my main concern.
>>>>> >>>> >>>>>> +}
>>>>>>>> +
>>>>>>>> static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
>>>>>>>> {
>>>>>>>> >> @@ -1811,20 +1835,11 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
>>>>>>>> native_cmd == CMD_PARAMETER_CHANGE_COL) {
>>>>>>>> /* Copy flash cache word-wise */
>>>>>>>> u32 *flash_cache = (u32 *)ctrl->flash_cache;
>>>>>>>> - int i;
>>>>>>>> >> brcmnand_soc_data_bus_prepare(ctrl->soc, true);
>>>>>>>> >> - /*
>>>>>>>> - * Must cache the FLASH_CACHE now, since changes in
>>>>>>>> - * SECTOR_SIZE_1K may invalidate it
>>>>>>>> - */
>>>>>>>> - for (i = 0; i < FC_WORDS; i++)
>>>>>>>> - /*
>>>>>>>> - * Flash cache is big endian for parameter pages, at
>>>>>>>> - * least on STB SoCs
>>>>>>>> - */
>>>>>>>> - flash_cache[i] = be32_to_cpu(brcmnand_read_fc(ctrl, i));
>>>>>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, flash_cache,
>>>>>>>> + FC_WORDS, true);
>>>>>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, true);
>>>>>>>> >> @@ -2137,7 +2152,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>>>>>>>> {
>>>>>>>> struct brcmnand_host *host = nand_get_controller_data(chip);
>>>>>>>> struct brcmnand_controller *ctrl = host->ctrl;
>>>>>>>> - int i, j, ret = 0;
>>>>>>>> + int i, ret = 0;
>>>>>>>> >> brcmnand_clear_ecc_addr(ctrl);
>>>>>>>> >> @@ -2150,8 +2165,9 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
>>>>>>>> if (likely(buf)) {
>>>>>>>> brcmnand_soc_data_bus_prepare(ctrl->soc, false);
>>>>>>>> >> - for (j = 0; j < FC_WORDS; j++, buf++)
>>>>>>>> - *buf = brcmnand_read_fc(ctrl, j);
>>>>>>>> + brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf,
>>>>>>>> + FC_WORDS, false);
>>>>>>>> + buf += FC_WORDS;
>>>>>>>> >> brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
>>>>>>>> }
>>>>>>>> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.h b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>>>>>> index f1f93d85f50d..88819bc395f8 100644
>>>>>>>> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>>>>>> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.h
>>>>>>>> @@ -24,6 +24,8 @@ struct brcmnand_soc {
>>>>>>>> void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
>>>>>>>> void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
>>>>>>>> bool is_param);
>>>>>>>> + void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
>>>>>>>> + u32 *buffer, int fc_words, bool is_param);
>>>>>>>> const struct brcmnand_io_ops *ops;
>>>>>>>> };
>>>>>>>> > >
>>>>>>> Thanks,
>>>>>>> Miquèl
>>>>>>> > >
>>>>> Thanks,
>>>>> Miquèl
>>>>> > >
>>> Thanks,
>>> Miquèl
>>>
>
>
> Thanks,
> Miquèl
>
On 06/12/2023 10:53 AM, Miquel Raynal wrote:
> Hello again,
>
>>>>>>>> Perhaps we could have a single function that is statically assigned at
>>>>>>>> probe time instead of a first helper with two conditions which calls in
>>>>>>>> one case another hook... This can be simplified I guess.
>>>>>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
>>>>>>
>>>>>> You told me in case we would use exec_op we could avoid the param
>>>>>> cache. If that's true then the whole support can be simplified.
>>>>>> >> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
>>>>>
>>>>> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
>>>>
>>>> I am sorry but this series is totally backwards, you're trying to guess
>>>> what comes next with the 'is_param' thing, it's exactly what we are
>>>> fighting against since 2017. There are plenty of ->exec_op()
>>>> conversions out there, I don't believe this one will be harder. You
>>>> need to convert the driver to this new API and get rid of this whole
>>>> endianness non-sense to simplify a lot the driver.
>>>>
>>> I am not guessing anything but just factor out the existing common nand cache read logic into the single default function(or one for page read and another for parameter read as I mentioned in another thread) and allow SoC to overrides the implementation when needed.
>>
>> No, you are trying to guess what type of read the core is performing,
>> either a regular data page read or a parameter page read.
>>
>>> I agree ->exec_op can possibly get rid of the parameter page read function and is the way to go. But it won't help on the page read for endianess.
>>
>> You told me there is no endianess issue with the data pages, so why it
>> won't help on the page read?
>>
>>> It's not that I am against exec_op but I want to take one step a time
>>> and I'd like to get these fixes
>>
>> I don't see any fix here? Let me know if I am missing something but
>> right now I see a new version of the controller being supported with
>> its own constraints. If you are fixing existing code for already
>> supported platform, then make it clear and we can discuss this. But if
>> you just want to support the bcmbca flavor, then there is no risk
>> mitigation involved here, and a conversion is the right step :)
>>
>
> I forgot to mention: the exec_op conversion is almost ready, Boris
> worked on it but he lacked the hardware so maybe you'll just need to
> revive the few patches which target your platform and do a little bit of
> debugging?
>
> https://github.com/bbrezillon/linux/commits/nand/exec-op-conversion?after=8a3cf6fd25d5e15c6667f9e95c1fc86e4cb735e6+34&branch=nand%2Fexec-op-conversion&qualified_name=refs%2Fheads%2Fnand%2Fexec-op-conversion
>
Yes this is the patch what our exec_op work is based on. Thanks Boris!
The issue with patch is that performance is very slow for anything that
rely on nand_read_page_op as the patch implementing it using the low
level cmd and data register to transfer the data byte by byte. I
actually sent out email regarding this to Boris and he cc'ed you in sept
last year. We have to use the nand parser to match the page read from
exec_op so we can actually match and use the brcmnand_page_read fast
path. But there are many situations that we need to match so the project
to migrate exce_op are still work in progress just on our bcmbca chip as
of now. Just forward that email again to you and I appreciate it if you
have any inputs there. So IMHO it is just too risky and too big of
scope to have the exec_op added to this patch series and definitively
better to do it afterwards with a dedicated patch.
> Cheers,
> Miquèl
>
Hi William,
[email protected] wrote on Mon, 12 Jun 2023 12:18:58 -0700:
> On 06/12/2023 10:53 AM, Miquel Raynal wrote:
> > Hello again,
> >
> >>>>>>>> Perhaps we could have a single function that is statically assigned at
> >>>>>>>> probe time instead of a first helper with two conditions which calls in
> >>>>>>>> one case another hook... This can be simplified I guess.
> >>>>>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
> >>>>>>
> >>>>>> You told me in case we would use exec_op we could avoid the param
> >>>>>> cache. If that's true then the whole support can be simplified.
> >>>>>> >> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
> >>>>>
> >>>>> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
> >>>>
> >>>> I am sorry but this series is totally backwards, you're trying to guess
> >>>> what comes next with the 'is_param' thing, it's exactly what we are
> >>>> fighting against since 2017. There are plenty of ->exec_op()
> >>>> conversions out there, I don't believe this one will be harder. You
> >>>> need to convert the driver to this new API and get rid of this whole
> >>>> endianness non-sense to simplify a lot the driver.
> >>>> >>> I am not guessing anything but just factor out the existing common nand cache read logic into the single default function(or one for page read and another for parameter read as I mentioned in another thread) and allow SoC to overrides the implementation when needed.
> >>
> >> No, you are trying to guess what type of read the core is performing,
> >> either a regular data page read or a parameter page read.
> >>
> >>> I agree ->exec_op can possibly get rid of the parameter page read function and is the way to go. But it won't help on the page read for endianess.
> >>
> >> You told me there is no endianess issue with the data pages, so why it
> >> won't help on the page read?
> >>
> >>> It's not that I am against exec_op but I want to take one step a time
> >>> and I'd like to get these fixes
> >>
> >> I don't see any fix here? Let me know if I am missing something but
> >> right now I see a new version of the controller being supported with
> >> its own constraints. If you are fixing existing code for already
> >> supported platform, then make it clear and we can discuss this. But if
> >> you just want to support the bcmbca flavor, then there is no risk
> >> mitigation involved here, and a conversion is the right step :)
> >>
> >
> > I forgot to mention: the exec_op conversion is almost ready, Boris
> > worked on it but he lacked the hardware so maybe you'll just need to
> > revive the few patches which target your platform and do a little bit of
> > debugging?
> >
> > https://github.com/bbrezillon/linux/commits/nand/exec-op-conversion?after=8a3cf6fd25d5e15c6667f9e95c1fc86e4cb735e6+34&branch=nand%2Fexec-op-conversion&qualified_name=refs%2Fheads%2Fnand%2Fexec-op-conversion
> >
> Yes this is the patch what our exec_op work is based on. Thanks Boris! The issue with patch is that performance is very slow for anything that rely on nand_read_page_op as the patch implementing it using the low level cmd and data register to transfer the data byte by byte.
You don't need to use exec_op for your read_page/write_page hooks,
quite the opposite actually. exec_op is not meant for high throughput.
exec_op is meant to be simple. You can have fast I/Os with a different
mechanism in your read/write_page hooks.
> I actually sent out email regarding this to Boris and he cc'ed you in
> sept last year. We have to use the nand parser to match the page read
> from exec_op so we can actually match and use the brcmnand_page_read
> fast path. But there are many situations that we need to match so the
> project to migrate exce_op are still work in progress just on our
> bcmbca chip as of now. Just forward that email again to you and I
> appreciate it if you have any inputs there. So IMHO it is just too
> risky and too big of scope to have the exec_op added to this patch
> series and definitively better to do it afterwards with a dedicated
> patch.
As long as you add small and orthogonal changes to cmd_ctrl/cmd_func
I don't mind, but what you want now is to force me to pull dirty
changes "first", the type of change we are refusing since 2018, making
me expect you'll perform the conversion after. It would have been
terribly less dirty and you would have all your code already upstreamed
if you had performed the exec_op conversion since September.
Thanks,
Miquèl
Hi Miquel,
On 06/12/2023 11:42 PM, Miquel Raynal wrote:
> Hi William,
>
> [email protected] wrote on Mon, 12 Jun 2023 12:18:58 -0700:
>
>> On 06/12/2023 10:53 AM, Miquel Raynal wrote:
>>> Hello again,
>>>
>>>>>>>>>> Perhaps we could have a single function that is statically assigned at
>>>>>>>>>> probe time instead of a first helper with two conditions which calls in
>>>>>>>>>> one case another hook... This can be simplified I guess.
>>>>>>>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
>>>>>>>>
>>>>>>>> You told me in case we would use exec_op we could avoid the param
>>>>>>>> cache. If that's true then the whole support can be simplified.
>>>>>>>> >> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
>>>>>>>
>>>>>>> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
>>>>>>
>>>>>> I am sorry but this series is totally backwards, you're trying to guess
>>>>>> what comes next with the 'is_param' thing, it's exactly what we are
>>>>>> fighting against since 2017. There are plenty of ->exec_op()
>>>>>> conversions out there, I don't believe this one will be harder. You
>>>>>> need to convert the driver to this new API and get rid of this whole
>>>>>> endianness non-sense to simplify a lot the driver.
>>>>>> >>> I am not guessing anything but just factor out the existing common nand cache read logic into the single default function(or one for page read and another for parameter read as I mentioned in another thread) and allow SoC to overrides the implementation when needed.
>>>>
>>>> No, you are trying to guess what type of read the core is performing,
>>>> either a regular data page read or a parameter page read.
>>>>
>>>>> I agree ->exec_op can possibly get rid of the parameter page read function and is the way to go. But it won't help on the page read for endianess.
>>>>
>>>> You told me there is no endianess issue with the data pages, so why it
>>>> won't help on the page read?
>>>>
>>>>> It's not that I am against exec_op but I want to take one step a time
>>>>> and I'd like to get these fixes
>>>>
>>>> I don't see any fix here? Let me know if I am missing something but
>>>> right now I see a new version of the controller being supported with
>>>> its own constraints. If you are fixing existing code for already
>>>> supported platform, then make it clear and we can discuss this. But if
>>>> you just want to support the bcmbca flavor, then there is no risk
>>>> mitigation involved here, and a conversion is the right step :)
>>>>
>>>
>>> I forgot to mention: the exec_op conversion is almost ready, Boris
>>> worked on it but he lacked the hardware so maybe you'll just need to
>>> revive the few patches which target your platform and do a little bit of
>>> debugging?
>>>
>>> https://github.com/bbrezillon/linux/commits/nand/exec-op-conversion?after=8a3cf6fd25d5e15c6667f9e95c1fc86e4cb735e6+34&branch=nand%2Fexec-op-conversion&qualified_name=refs%2Fheads%2Fnand%2Fexec-op-conversion
>>>
>> Yes this is the patch what our exec_op work is based on. Thanks Boris! The issue with patch is that performance is very slow for anything that rely on nand_read_page_op as the patch implementing it using the low level cmd and data register to transfer the data byte by byte.
>
> You don't need to use exec_op for your read_page/write_page hooks,
> quite the opposite actually. exec_op is not meant for high throughput.
> exec_op is meant to be simple. You can have fast I/Os with a different
> mechanism in your read/write_page hooks.
>
Right it does not impact our fast path: controller based ecc read/write.
But things like on-chip ecc nand driver that uses exec_op API get
impacted badly. We need to add nand op parser, several matching rules
and other logics to use fast path page read/write instead of the low
level data register read/write.
>> I actually sent out email regarding this to Boris and he cc'ed you in
>> sept last year. We have to use the nand parser to match the page read
>> from exec_op so we can actually match and use the brcmnand_page_read
>> fast path. But there are many situations that we need to match so the
>> project to migrate exce_op are still work in progress just on our
>> bcmbca chip as of now. Just forward that email again to you and I
>> appreciate it if you have any inputs there. So IMHO it is just too
>> risky and too big of scope to have the exec_op added to this patch
>> series and definitively better to do it afterwards with a dedicated
>> patch.
>
> As long as you add small and orthogonal changes to cmd_ctrl/cmd_func
> I don't mind, but what you want now is to force me to pull dirty
> changes "first", the type of change we are refusing since 2018, making
> me expect you'll perform the conversion after. It would have been
> terribly less dirty and you would have all your code already upstreamed
> if you had performed the exec_op conversion since September.
>
I didn't work on open source 5 years ago. I am sorry that I missed the
background of the rejected changes since then but I do not agree that
this change is dirty change just because I factor out the code with
is_param argument(and I offered an alternative to remove is_param with
two data read functions).
I see your point with exec_op and agree that is the way to go. We had
an initial look of the Borris exec_op patch last Sept and noticed the
performance issue but we haven't got the chance to actively work on
improving the performance and prepare for up-streaming until recently.
What if we bring in the original exec_op patch in this series so we
don't need to add the parameter data read function(if we verify it works
on difference SoCs without endianess)? Or better to have exec_op as
separate patch first and then this series? Then we provide another
patch to improve the performance for exec_op as this work is still in
progress and require more testing.
> Thanks,
> Miquèl
>
Hi William,
[email protected] wrote on Tue, 13 Jun 2023 17:00:19 -0700:
> Hi Miquel,
>
> On 06/12/2023 11:42 PM, Miquel Raynal wrote:
> > Hi William,
> >
> > [email protected] wrote on Mon, 12 Jun 2023 12:18:58 -0700:
> >
> >> On 06/12/2023 10:53 AM, Miquel Raynal wrote:
> >>> Hello again,
> >>> >>>>>>>>>> Perhaps we could have a single function that is statically assigned at
> >>>>>>>>>> probe time instead of a first helper with two conditions which calls in
> >>>>>>>>>> one case another hook... This can be simplified I guess.
> >>>>>>>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
> >>>>>>>>
> >>>>>>>> You told me in case we would use exec_op we could avoid the param
> >>>>>>>> cache. If that's true then the whole support can be simplified.
> >>>>>>>> >> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
> >>>>>>>
> >>>>>>> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
> >>>>>>
> >>>>>> I am sorry but this series is totally backwards, you're trying to guess
> >>>>>> what comes next with the 'is_param' thing, it's exactly what we are
> >>>>>> fighting against since 2017. There are plenty of ->exec_op()
> >>>>>> conversions out there, I don't believe this one will be harder. You
> >>>>>> need to convert the driver to this new API and get rid of this whole
> >>>>>> endianness non-sense to simplify a lot the driver.
> >>>>>> >>> I am not guessing anything but just factor out the existing common nand cache read logic into the single default function(or one for page read and another for parameter read as I mentioned in another thread) and allow SoC to overrides the implementation when needed.
> >>>>
> >>>> No, you are trying to guess what type of read the core is performing,
> >>>> either a regular data page read or a parameter page read.
> >>>> >>>>> I agree ->exec_op can possibly get rid of the parameter page read function and is the way to go. But it won't help on the page read for endianess.
> >>>>
> >>>> You told me there is no endianess issue with the data pages, so why it
> >>>> won't help on the page read?
> >>>> >>>>> It's not that I am against exec_op but I want to take one step a time
> >>>>> and I'd like to get these fixes
> >>>>
> >>>> I don't see any fix here? Let me know if I am missing something but
> >>>> right now I see a new version of the controller being supported with
> >>>> its own constraints. If you are fixing existing code for already
> >>>> supported platform, then make it clear and we can discuss this. But if
> >>>> you just want to support the bcmbca flavor, then there is no risk
> >>>> mitigation involved here, and a conversion is the right step :)
> >>>> >>>
> >>> I forgot to mention: the exec_op conversion is almost ready, Boris
> >>> worked on it but he lacked the hardware so maybe you'll just need to
> >>> revive the few patches which target your platform and do a little bit of
> >>> debugging?
> >>>
> >>> https://github.com/bbrezillon/linux/commits/nand/exec-op-conversion?after=8a3cf6fd25d5e15c6667f9e95c1fc86e4cb735e6+34&branch=nand%2Fexec-op-conversion&qualified_name=refs%2Fheads%2Fnand%2Fexec-op-conversion
> >>> >> Yes this is the patch what our exec_op work is based on. Thanks Boris! The issue with patch is that performance is very slow for anything that rely on nand_read_page_op as the patch implementing it using the low level cmd and data register to transfer the data byte by byte.
> >
> > You don't need to use exec_op for your read_page/write_page hooks,
> > quite the opposite actually. exec_op is not meant for high throughput.
> > exec_op is meant to be simple. You can have fast I/Os with a different
> > mechanism in your read/write_page hooks.
> >
> Right it does not impact our fast path: controller based ecc read/write. But things like on-chip ecc nand driver that uses exec_op API get impacted badly. We need to add nand op parser, several matching rules and other logics to use fast path page read/write instead of the low level data register read/write.
>
> >> I actually sent out email regarding this to Boris and he cc'ed you in
> >> sept last year. We have to use the nand parser to match the page read
> >> from exec_op so we can actually match and use the brcmnand_page_read
> >> fast path. But there are many situations that we need to match so the
> >> project to migrate exce_op are still work in progress just on our
> >> bcmbca chip as of now. Just forward that email again to you and I
> >> appreciate it if you have any inputs there. So IMHO it is just too
> >> risky and too big of scope to have the exec_op added to this patch
> >> series and definitively better to do it afterwards with a dedicated
> >> patch.
> >
> > As long as you add small and orthogonal changes to cmd_ctrl/cmd_func
> > I don't mind, but what you want now is to force me to pull dirty
> > changes "first", the type of change we are refusing since 2018, making
> > me expect you'll perform the conversion after. It would have been
> > terribly less dirty and you would have all your code already upstreamed
> > if you had performed the exec_op conversion since September.
> >
> I didn't work on open source 5 years ago. I am sorry that I missed the background of the rejected changes since then but I do not agree that this change is dirty change just because I factor out the code with is_param argument(and I offered an alternative to remove is_param with two data read functions).
This _is_ dirty because you cannot know with the cmd_ctrl/cmdfunc
API whether we read a parameter page or a page of data. So your are
_guessing_. There are plenty ways of reading one of the others, the
heuristics on the controller side will _always_ be wrong. That is why
exec_op() was introduced.
> I see your point with exec_op and agree that is the way to go. We had an initial look of the Borris exec_op patch last Sept and noticed the performance issue but we haven't got the chance to actively work on improving the performance and prepare for up-streaming until recently. What if we bring in the original exec_op patch in this series so we don't need to add the parameter data read function(if we verify it works on difference SoCs without endianess)? Or better to have exec_op as separate patch first and then this series?
This one is my favorite:
1/ Add exec_op support
2/ Remove legacy hooks
3/ Add support for the bcmbca SoC
Then you can improve the performance for on-die ECC situations, but to
be honest this improvement looks little a very little addition. You can
take example from the existing hooks, how they match specific
operations in the parser and then hook them to specific helpers.
Nothing terribly complex, there are dozens of conversions available
now.
Good luck :)
Miquèl
On 06/13/2023 11:22 PM, Miquel Raynal wrote:
> Hi William,
>
> [email protected] wrote on Tue, 13 Jun 2023 17:00:19 -0700:
>
>> Hi Miquel,
>>
>> On 06/12/2023 11:42 PM, Miquel Raynal wrote:
>>> Hi William,
>>>
>>> [email protected] wrote on Mon, 12 Jun 2023 12:18:58 -0700:
>>>
>>>> On 06/12/2023 10:53 AM, Miquel Raynal wrote:
>>>>> Hello again,
>>>>> >>>>>>>>>> Perhaps we could have a single function that is statically assigned at
>>>>>>>>>>>> probe time instead of a first helper with two conditions which calls in
>>>>>>>>>>>> one case another hook... This can be simplified I guess.
>>>>>>>>>>>> >> Well this will need to be done at the SoC specific implementation level (bcm<xxx>_nand.c) and each SoC will need to have either general data bus read func with is_param option or data_bus_read_page, data_bus_read_param.
>>>>>>>>>>
>>>>>>>>>> You told me in case we would use exec_op we could avoid the param
>>>>>>>>>> cache. If that's true then the whole support can be simplified.
>>>>>>>>>> >> Correct we may possibly unified the parameter data read but exec_op is long shot and we are not fully ready for that yet. It also depends on if the low level data register has endianess difference for the parameter data between difference SoCs.
>>>>>>>>>
>>>>>>>>> So I would like to push the current implementation and we can explore the exec_op option late which will be a much big and complete different implementation.
>>>>>>>>
>>>>>>>> I am sorry but this series is totally backwards, you're trying to guess
>>>>>>>> what comes next with the 'is_param' thing, it's exactly what we are
>>>>>>>> fighting against since 2017. There are plenty of ->exec_op()
>>>>>>>> conversions out there, I don't believe this one will be harder. You
>>>>>>>> need to convert the driver to this new API and get rid of this whole
>>>>>>>> endianness non-sense to simplify a lot the driver.
>>>>>>>> >>> I am not guessing anything but just factor out the existing common nand cache read logic into the single default function(or one for page read and another for parameter read as I mentioned in another thread) and allow SoC to overrides the implementation when needed.
>>>>>>
>>>>>> No, you are trying to guess what type of read the core is performing,
>>>>>> either a regular data page read or a parameter page read.
>>>>>> >>>>> I agree ->exec_op can possibly get rid of the parameter page read function and is the way to go. But it won't help on the page read for endianess.
>>>>>>
>>>>>> You told me there is no endianess issue with the data pages, so why it
>>>>>> won't help on the page read?
>>>>>> >>>>> It's not that I am against exec_op but I want to take one step a time
>>>>>>> and I'd like to get these fixes
>>>>>>
>>>>>> I don't see any fix here? Let me know if I am missing something but
>>>>>> right now I see a new version of the controller being supported with
>>>>>> its own constraints. If you are fixing existing code for already
>>>>>> supported platform, then make it clear and we can discuss this. But if
>>>>>> you just want to support the bcmbca flavor, then there is no risk
>>>>>> mitigation involved here, and a conversion is the right step :)
>>>>>> >>>
>>>>> I forgot to mention: the exec_op conversion is almost ready, Boris
>>>>> worked on it but he lacked the hardware so maybe you'll just need to
>>>>> revive the few patches which target your platform and do a little bit of
>>>>> debugging?
>>>>>
>>>>> https://github.com/bbrezillon/linux/commits/nand/exec-op-conversion?after=8a3cf6fd25d5e15c6667f9e95c1fc86e4cb735e6+34&branch=nand%2Fexec-op-conversion&qualified_name=refs%2Fheads%2Fnand%2Fexec-op-conversion
>>>>> >> Yes this is the patch what our exec_op work is based on. Thanks Boris! The issue with patch is that performance is very slow for anything that rely on nand_read_page_op as the patch implementing it using the low level cmd and data register to transfer the data byte by byte.
>>>
>>> You don't need to use exec_op for your read_page/write_page hooks,
>>> quite the opposite actually. exec_op is not meant for high throughput.
>>> exec_op is meant to be simple. You can have fast I/Os with a different
>>> mechanism in your read/write_page hooks.
>>>
>> Right it does not impact our fast path: controller based ecc read/write. But things like on-chip ecc nand driver that uses exec_op API get impacted badly. We need to add nand op parser, several matching rules and other logics to use fast path page read/write instead of the low level data register read/write.
>>
>>>> I actually sent out email regarding this to Boris and he cc'ed you in
>>>> sept last year. We have to use the nand parser to match the page read
>>>> from exec_op so we can actually match and use the brcmnand_page_read
>>>> fast path. But there are many situations that we need to match so the
>>>> project to migrate exce_op are still work in progress just on our
>>>> bcmbca chip as of now. Just forward that email again to you and I
>>>> appreciate it if you have any inputs there. So IMHO it is just too
>>>> risky and too big of scope to have the exec_op added to this patch
>>>> series and definitively better to do it afterwards with a dedicated
>>>> patch.
>>>
>>> As long as you add small and orthogonal changes to cmd_ctrl/cmd_func
>>> I don't mind, but what you want now is to force me to pull dirty
>>> changes "first", the type of change we are refusing since 2018, making
>>> me expect you'll perform the conversion after. It would have been
>>> terribly less dirty and you would have all your code already upstreamed
>>> if you had performed the exec_op conversion since September.
>>>
>> I didn't work on open source 5 years ago. I am sorry that I missed the background of the rejected changes since then but I do not agree that this change is dirty change just because I factor out the code with is_param argument(and I offered an alternative to remove is_param with two data read functions).
>
> This _is_ dirty because you cannot know with the cmd_ctrl/cmdfunc
> API whether we read a parameter page or a page of data. So your are
> _guessing_. There are plenty ways of reading one of the others, the
> heuristics on the controller side will _always_ be wrong. That is why
> exec_op() was introduced.
>
alright we have different definition of dirty ;) Understand it is not a
preferred way to update the code in controller cmdfunc path especially
for large change that can be done in exec_op.
>> I see your point with exec_op and agree that is the way to go. We had an initial look of the Borris exec_op patch last Sept and noticed the performance issue but we haven't got the chance to actively work on improving the performance and prepare for up-streaming until recently. What if we bring in the original exec_op patch in this series so we don't need to add the parameter data read function(if we verify it works on difference SoCs without endianess)? Or better to have exec_op as separate patch first and then this series?
>
> This one is my favorite:
> 1/ Add exec_op support
> 2/ Remove legacy hooks
> 3/ Add support for the bcmbca SoC
>
Sounds good. We will send exec_op series for 1 and 2 then another
series for 3. And I will send v2 of this series to just include the
fixes (patch 1 to patch 4) with updates based on the comments received.
> Then you can improve the performance for on-die ECC situations, but to
> be honest this improvement looks little a very little addition. You can
> take example from the existing hooks, how they match specific
> operations in the parser and then hook them to specific helpers.
> Nothing terribly complex, there are dozens of conversions available
> now.
>
> Good luck :)
> Miquèl
>