2023-03-17 11:36:33

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: [PATCH 0/3] net: dsa: b53: mmap: add MDIO Mux bus controller

B53 MMAP devices have a MDIO Mux bus controller that must be registered after
properly initializing the switch.
If the MDIO Mux controller is registered from a separate driver and the device
has an external switch present, it will cause a race condition which will hang
the device.

Álvaro Fernández Rojas (3):
dt-bindings: net: move bcm6368-mdio-mux bindings to b53
net: dsa: b53: mmap: register MDIO Mux bus controller
net: mdio: remove BCM6368 MDIO mux bus driver

.../bindings/net/brcm,bcm6368-mdio-mux.yaml | 52 -----
.../devicetree/bindings/net/dsa/brcm,b53.yaml | 131 +++++++++++++
drivers/net/dsa/b53/Kconfig | 1 +
drivers/net/dsa/b53/b53_mmap.c | 127 +++++++++++-
drivers/net/mdio/Kconfig | 11 --
drivers/net/mdio/Makefile | 1 -
drivers/net/mdio/mdio-mux-bcm6368.c | 184 ------------------
7 files changed, 258 insertions(+), 249 deletions(-)
delete mode 100644 Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml
delete mode 100644 drivers/net/mdio/mdio-mux-bcm6368.c

--
2.30.2



2023-03-17 11:36:43

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

b53 MMAP devices have a MDIO Mux bus controller that must be registered after
properly initializing the switch. If the MDIO Mux controller is registered
from a separate driver and the device has an external switch present, it will
cause a race condition which will hang the device.

Signed-off-by: Álvaro Fernández Rojas <[email protected]>
---
drivers/net/dsa/b53/Kconfig | 1 +
drivers/net/dsa/b53/b53_mmap.c | 127 ++++++++++++++++++++++++++++++++-
2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/b53/Kconfig b/drivers/net/dsa/b53/Kconfig
index ebaa4a80d544..04450ee1ba82 100644
--- a/drivers/net/dsa/b53/Kconfig
+++ b/drivers/net/dsa/b53/Kconfig
@@ -26,6 +26,7 @@ config B53_MDIO_DRIVER
config B53_MMAP_DRIVER
tristate "B53 MMAP connected switch driver"
depends on B53 && HAS_IOMEM
+ select MDIO_BUS_MUX
default BCM63XX || BMIPS_GENERIC
help
Select to enable support for memory-mapped switches like the BCM63XX
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index e968322dfbf0..44becbb12bb5 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -18,15 +18,31 @@

#include <linux/bits.h>
#include <linux/kernel.h>
+#include <linux/mdio-mux.h>
#include <linux/module.h>
#include <linux/io.h>
+#include <linux/of_mdio.h>
#include <linux/platform_device.h>
#include <linux/platform_data/b53.h>

#include "b53_priv.h"

+#define REG_MDIOC 0xb0
+#define REG_MDIOC_EXT_MASK BIT(16)
+#define REG_MDIOC_REG_SHIFT 20
+#define REG_MDIOC_PHYID_SHIFT 25
+#define REG_MDIOC_RD_MASK BIT(30)
+#define REG_MDIOC_WR_MASK BIT(31)
+
+#define REG_MDIOD 0xb4
+
struct b53_mmap_priv {
void __iomem *regs;
+
+ /* Internal MDIO Mux bus */
+ struct mii_bus *mbus;
+ int ext_phy;
+ void *mux_handle;
};

static int b53_mmap_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
@@ -229,6 +245,111 @@ static const struct b53_io_ops b53_mmap_ops = {
.write64 = b53_mmap_write64,
};

+static int b53_mmap_mdiomux_read(struct mii_bus *bus, int phy_id, int loc)
+{
+ struct b53_device *dev = bus->priv;
+ struct b53_mmap_priv *priv = dev->priv;
+ uint32_t reg;
+ uint16_t val;
+
+ b53_mmap_write32(dev, 0, REG_MDIOC, 0);
+
+ reg = REG_MDIOC_RD_MASK |
+ (phy_id << REG_MDIOC_PHYID_SHIFT) |
+ (loc << REG_MDIOC_REG_SHIFT);
+ if (priv->ext_phy)
+ reg |= REG_MDIOC_EXT_MASK;
+
+ b53_mmap_write32(dev, 0, REG_MDIOC, reg);
+ udelay(50);
+ b53_mmap_read16(dev, 0, REG_MDIOD, &val);
+
+ return (int) val;
+}
+
+static int b53_mmap_mdiomux_write(struct mii_bus *bus, int phy_id, int loc,
+ uint16_t val)
+{
+ struct b53_device *dev = bus->priv;
+ struct b53_mmap_priv *priv = dev->priv;
+ uint32_t reg;
+
+ b53_mmap_write32(dev, 0, REG_MDIOC, 0);
+
+ reg = REG_MDIOC_WR_MASK |
+ (phy_id << REG_MDIOC_PHYID_SHIFT) |
+ (loc << REG_MDIOC_REG_SHIFT);
+ if (priv->ext_phy)
+ reg |= REG_MDIOC_EXT_MASK;
+ reg |= val;
+
+ b53_mmap_write32(dev, 0, REG_MDIOC, reg);
+ udelay(50);
+
+ return 0;
+}
+
+static int b53_mmap_mdiomux_switch_fn(int current_child, int desired_child,
+ void *data)
+{
+ struct b53_device *dev = data;
+ struct b53_mmap_priv *priv = dev->priv;
+
+ priv->ext_phy = desired_child;
+
+ return 0;
+}
+
+static int b53_mmap_mdiomux_init(struct b53_device *priv)
+{
+ struct b53_mmap_priv *mpriv = priv->priv;
+ struct device *dev = priv->dev;
+ struct device_node *np = dev->of_node;
+ struct device_node *mnp;
+ struct mii_bus *mbus;
+ int ret;
+
+ mnp = of_get_child_by_name(np, "mdio-mux");
+ if (!mnp)
+ return 0;
+
+ mbus = devm_mdiobus_alloc(dev);
+ if (!mbus) {
+ of_node_put(mnp);
+ return -ENOMEM;
+ }
+
+ mbus->priv = priv;
+ mbus->name = np->full_name;
+ snprintf(mbus->id, MII_BUS_ID_SIZE, "%pOF", np);
+ mbus->parent = dev;
+ mbus->read = b53_mmap_mdiomux_read;
+ mbus->write = b53_mmap_mdiomux_write;
+ mbus->phy_mask = 0x3f;
+
+ ret = devm_of_mdiobus_register(dev, mbus, mnp);
+ if (ret) {
+ of_node_put(mnp);
+ dev_err(dev, "MDIO mux registration failed\n");
+ return ret;
+ }
+
+ ret = mdio_mux_init(dev, mnp, b53_mmap_mdiomux_switch_fn,
+ &mpriv->mux_handle, priv, mbus);
+ of_node_put(mnp);
+ if (ret) {
+ mdiobus_unregister(mbus);
+ dev_err(dev, "MDIO mux initialization failed\n");
+ return ret;
+ }
+
+ dev_info(dev, "MDIO mux bus init\n");
+
+ mpriv->mbus = mbus;
+
+ return 0;
+}
+
static int b53_mmap_probe_of(struct platform_device *pdev,
struct b53_platform_data **ppdata)
{
@@ -306,7 +427,11 @@ static int b53_mmap_probe(struct platform_device *pdev)

platform_set_drvdata(pdev, dev);

- return b53_switch_register(dev);
+ ret = b53_switch_register(dev);
+ if (ret)
+ return ret;
+
+ return b53_mmap_mdiomux_init(dev);
}

static int b53_mmap_remove(struct platform_device *pdev)
--
2.30.2


2023-03-17 11:36:47

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: [PATCH 1/3] dt-bindings: net: move bcm6368-mdio-mux bindings to b53

b53 MMAP devices have a MDIO Mux bus controller that must be registered after
properly initializing the switch. If the MDIO Mux controller is registered
from a separate driver and the device has an external switch present, it will
cause a race condition which will hang the device.

Signed-off-by: Álvaro Fernández Rojas <[email protected]>
---
.../bindings/net/brcm,bcm6368-mdio-mux.yaml | 52 -------
.../devicetree/bindings/net/dsa/brcm,b53.yaml | 131 ++++++++++++++++++
2 files changed, 131 insertions(+), 52 deletions(-)
delete mode 100644 Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml

diff --git a/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml b/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml
deleted file mode 100644
index 9ef28c2a0afc..000000000000
--- a/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml
+++ /dev/null
@@ -1,52 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/net/brcm,bcm6368-mdio-mux.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Broadcom BCM6368 MDIO bus multiplexer
-
-maintainers:
- - Álvaro Fernández Rojas <[email protected]>
-
-description:
- This MDIO bus multiplexer defines buses that could be internal as well as
- external to SoCs. When child bus is selected, one needs to select these two
- properties as well to generate desired MDIO transaction on appropriate bus.
-
-allOf:
- - $ref: mdio-mux.yaml#
-
-properties:
- compatible:
- const: brcm,bcm6368-mdio-mux
-
- reg:
- maxItems: 1
-
-required:
- - compatible
- - reg
-
-unevaluatedProperties: false
-
-examples:
- - |
- mdio0: mdio@10e000b0 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "brcm,bcm6368-mdio-mux";
- reg = <0x10e000b0 0x6>;
-
- mdio_int: mdio@0 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0>;
- };
-
- mdio_ext: mdio@1 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <1>;
- };
- };
diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
index 5bef4128d175..b1a894899306 100644
--- a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
@@ -61,6 +61,17 @@ properties:
- brcm,bcm6368-switch
- const: brcm,bcm63xx-switch

+ big-endian:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Set this flag for switches with big endian registers.
+
+ mdio-mux:
+ $ref: /schemas/net/mdio-mux.yaml
+ description:
+ MDIO bus multiplexer defines buses that could be internal as well as
+ external to SoCs.
+
required:
- compatible
- reg
@@ -131,6 +142,22 @@ allOf:
reg:
maxItems: 1

+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,bcm3384-switch
+ - brcm,bcm6328-switch
+ - brcm,bcm6368-switch
+ then:
+ properties:
+ reg:
+ minItems: 1
+ maxItems: 1
+ required:
+ - mdio-mux
+
unevaluatedProperties: false

examples:
@@ -262,3 +289,107 @@ examples:
};
};
};
+ - |
+ switch0: switch@10f00000 {
+ compatible = "brcm,bcm6368-switch", "brcm,bcm63xx-switch";
+ reg = <0x10f00000 0x8000>;
+ big-endian;
+
+ dsa,member = <0 0>;
+
+ mdio-mux {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ mdio@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ mdio@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ switch@1e {
+ compatible = "brcm,bcm53125";
+ reg = <30>;
+
+ dsa,member = <1 0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "lan1";
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan2";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan3";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan4";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "wan";
+ };
+
+ port@8 {
+ reg = <8>;
+ label = "cpu";
+
+ phy-mode = "rgmii";
+ ethernet = <&switch0port4>;
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ };
+ };
+ };
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch0port4: port@4 {
+ reg = <4>;
+ label = "extsw";
+
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ port@8 {
+ reg = <8>;
+
+ phy-mode = "internal";
+ ethernet = <&ethernet>;
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ };
+ };
--
2.30.2


2023-03-17 11:36:49

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: [PATCH 3/3] net: mdio: remove BCM6368 MDIO mux bus driver

This driver is now registered from DSA B53 MMAP switches.

Signed-off-by: Álvaro Fernández Rojas <[email protected]>
---
drivers/net/mdio/Kconfig | 11 --
drivers/net/mdio/Makefile | 1 -
drivers/net/mdio/mdio-mux-bcm6368.c | 184 ----------------------------
3 files changed, 196 deletions(-)
delete mode 100644 drivers/net/mdio/mdio-mux-bcm6368.c

diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index 90309980686e..2891d63a942f 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -226,17 +226,6 @@ config MDIO_BUS_MUX_MESON_GXL
the amlogic GXL SoC. The multiplexer connects either the external
or the internal MDIO bus to the parent bus.

-config MDIO_BUS_MUX_BCM6368
- tristate "Broadcom BCM6368 MDIO bus multiplexers"
- depends on OF && OF_MDIO && (BMIPS_GENERIC || COMPILE_TEST)
- select MDIO_BUS_MUX
- default BMIPS_GENERIC
- help
- This module provides a driver for MDIO bus multiplexers found in
- BCM6368 based Broadcom SoCs. This multiplexer connects one of several
- child MDIO bus to a parent bus. Buses could be internal as well as
- external and selection logic lies inside the same multiplexer.
-
config MDIO_BUS_MUX_BCM_IPROC
tristate "Broadcom iProc based MDIO bus multiplexers"
depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile
index 7d4cb4c11e4e..06cbb64b88fc 100644
--- a/drivers/net/mdio/Makefile
+++ b/drivers/net/mdio/Makefile
@@ -24,7 +24,6 @@ obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o
obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o

obj-$(CONFIG_MDIO_BUS_MUX) += mdio-mux.o
-obj-$(CONFIG_MDIO_BUS_MUX_BCM6368) += mdio-mux-bcm6368.o
obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC) += mdio-mux-bcm-iproc.o
obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o
obj-$(CONFIG_MDIO_BUS_MUX_MESON_G12A) += mdio-mux-meson-g12a.o
diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c
deleted file mode 100644
index 8b444a8eb6b5..000000000000
--- a/drivers/net/mdio/mdio-mux-bcm6368.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Broadcom BCM6368 mdiomux bus controller driver
- *
- * Copyright (C) 2021 Álvaro Fernández Rojas <[email protected]>
- */
-
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mdio-mux.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <linux/of_mdio.h>
-#include <linux/phy.h>
-#include <linux/platform_device.h>
-#include <linux/sched.h>
-
-#define MDIOC_REG 0x0
-#define MDIOC_EXT_MASK BIT(16)
-#define MDIOC_REG_SHIFT 20
-#define MDIOC_PHYID_SHIFT 25
-#define MDIOC_RD_MASK BIT(30)
-#define MDIOC_WR_MASK BIT(31)
-
-#define MDIOD_REG 0x4
-
-struct bcm6368_mdiomux_desc {
- void *mux_handle;
- void __iomem *base;
- struct device *dev;
- struct mii_bus *mii_bus;
- int ext_phy;
-};
-
-static int bcm6368_mdiomux_read(struct mii_bus *bus, int phy_id, int loc)
-{
- struct bcm6368_mdiomux_desc *md = bus->priv;
- uint32_t reg;
- int ret;
-
- __raw_writel(0, md->base + MDIOC_REG);
-
- reg = MDIOC_RD_MASK |
- (phy_id << MDIOC_PHYID_SHIFT) |
- (loc << MDIOC_REG_SHIFT);
- if (md->ext_phy)
- reg |= MDIOC_EXT_MASK;
-
- __raw_writel(reg, md->base + MDIOC_REG);
- udelay(50);
- ret = __raw_readw(md->base + MDIOD_REG);
-
- return ret;
-}
-
-static int bcm6368_mdiomux_write(struct mii_bus *bus, int phy_id, int loc,
- uint16_t val)
-{
- struct bcm6368_mdiomux_desc *md = bus->priv;
- uint32_t reg;
-
- __raw_writel(0, md->base + MDIOC_REG);
-
- reg = MDIOC_WR_MASK |
- (phy_id << MDIOC_PHYID_SHIFT) |
- (loc << MDIOC_REG_SHIFT);
- if (md->ext_phy)
- reg |= MDIOC_EXT_MASK;
- reg |= val;
-
- __raw_writel(reg, md->base + MDIOC_REG);
- udelay(50);
-
- return 0;
-}
-
-static int bcm6368_mdiomux_switch_fn(int current_child, int desired_child,
- void *data)
-{
- struct bcm6368_mdiomux_desc *md = data;
-
- md->ext_phy = desired_child;
-
- return 0;
-}
-
-static int bcm6368_mdiomux_probe(struct platform_device *pdev)
-{
- struct bcm6368_mdiomux_desc *md;
- struct mii_bus *bus;
- struct resource *res;
- int rc;
-
- md = devm_kzalloc(&pdev->dev, sizeof(*md), GFP_KERNEL);
- if (!md)
- return -ENOMEM;
- md->dev = &pdev->dev;
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -EINVAL;
-
- /*
- * Just ioremap, as this MDIO block is usually integrated into an
- * Ethernet MAC controller register range
- */
- md->base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
- if (!md->base) {
- dev_err(&pdev->dev, "failed to ioremap register\n");
- return -ENOMEM;
- }
-
- md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
- if (!md->mii_bus) {
- dev_err(&pdev->dev, "mdiomux bus alloc failed\n");
- return -ENOMEM;
- }
-
- bus = md->mii_bus;
- bus->priv = md;
- bus->name = "BCM6368 MDIO mux bus";
- snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id);
- bus->parent = &pdev->dev;
- bus->read = bcm6368_mdiomux_read;
- bus->write = bcm6368_mdiomux_write;
- bus->phy_mask = 0x3f;
- bus->dev.of_node = pdev->dev.of_node;
-
- rc = mdiobus_register(bus);
- if (rc) {
- dev_err(&pdev->dev, "mdiomux registration failed\n");
- return rc;
- }
-
- platform_set_drvdata(pdev, md);
-
- rc = mdio_mux_init(md->dev, md->dev->of_node,
- bcm6368_mdiomux_switch_fn, &md->mux_handle, md,
- md->mii_bus);
- if (rc) {
- dev_info(md->dev, "mdiomux initialization failed\n");
- goto out_register;
- }
-
- dev_info(&pdev->dev, "Broadcom BCM6368 MDIO mux bus\n");
-
- return 0;
-
-out_register:
- mdiobus_unregister(bus);
- return rc;
-}
-
-static int bcm6368_mdiomux_remove(struct platform_device *pdev)
-{
- struct bcm6368_mdiomux_desc *md = platform_get_drvdata(pdev);
-
- mdio_mux_uninit(md->mux_handle);
- mdiobus_unregister(md->mii_bus);
-
- return 0;
-}
-
-static const struct of_device_id bcm6368_mdiomux_ids[] = {
- { .compatible = "brcm,bcm6368-mdio-mux", },
- { /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, bcm6368_mdiomux_ids);
-
-static struct platform_driver bcm6368_mdiomux_driver = {
- .driver = {
- .name = "bcm6368-mdio-mux",
- .of_match_table = bcm6368_mdiomux_ids,
- },
- .probe = bcm6368_mdiomux_probe,
- .remove = bcm6368_mdiomux_remove,
-};
-module_platform_driver(bcm6368_mdiomux_driver);
-
-MODULE_AUTHOR("Álvaro Fernández Rojas <[email protected]>");
-MODULE_DESCRIPTION("BCM6368 mdiomux bus controller driver");
-MODULE_LICENSE("GPL v2");
--
2.30.2


2023-03-17 11:51:36

by Vladimir Oltean

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

On Fri, Mar 17, 2023 at 12:34:26PM +0100, ?lvaro Fern?ndez Rojas wrote:
> b53 MMAP devices have a MDIO Mux bus controller that must be registered after
> properly initializing the switch. If the MDIO Mux controller is registered
> from a separate driver and the device has an external switch present, it will
> cause a race condition which will hang the device.

Could you describe the race in more details? Why does it hang the device?

2023-03-17 12:06:59

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

Hi Vladimir,

El vie, 17 mar 2023 a las 12:51, Vladimir Oltean (<[email protected]>) escribió:
>
> On Fri, Mar 17, 2023 at 12:34:26PM +0100, Álvaro Fernández Rojas wrote:
> > b53 MMAP devices have a MDIO Mux bus controller that must be registered after
> > properly initializing the switch. If the MDIO Mux controller is registered
> > from a separate driver and the device has an external switch present, it will
> > cause a race condition which will hang the device.
>
> Could you describe the race in more details? Why does it hang the device?

I didn't perform a full analysis on the problem, but what I think is
going on is that both b53 switches are probed and both of them fail
due to the ethernet device not being probed yet.
At some point, the internal switch is reset and not fully configured
and the external switch is probed again, but since the internal switch
isn't ready, the MDIO accesses for the external switch fail due to the
internal switch not being ready and this hangs the device because the
access to the external switch is done through the same registers from
the internal switch.

But maybe Florian or Jonas can give some more details about the issue...

--
Álvaro

2023-03-17 13:04:46

by Vladimir Oltean

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

On Fri, Mar 17, 2023 at 01:06:43PM +0100, ?lvaro Fern?ndez Rojas wrote:
> Hi Vladimir,
>
> El vie, 17 mar 2023 a las 12:51, Vladimir Oltean (<[email protected]>) escribi?:
> >
> > On Fri, Mar 17, 2023 at 12:34:26PM +0100, ?lvaro Fern?ndez Rojas wrote:
> > > b53 MMAP devices have a MDIO Mux bus controller that must be registered after
> > > properly initializing the switch. If the MDIO Mux controller is registered
> > > from a separate driver and the device has an external switch present, it will
> > > cause a race condition which will hang the device.
> >
> > Could you describe the race in more details? Why does it hang the device?
>
> I didn't perform a full analysis on the problem, but what I think is
> going on is that both b53 switches are probed and both of them fail
> due to the ethernet device not being probed yet.
> At some point, the internal switch is reset and not fully configured
> and the external switch is probed again, but since the internal switch
> isn't ready, the MDIO accesses for the external switch fail due to the
> internal switch not being ready and this hangs the device because the
> access to the external switch is done through the same registers from
> the internal switch.

The proposed solution is too radical for a problem that was not properly
characterized yet, so this patch set has my temporary NACK.

> But maybe Florian or Jonas can give some more details about the issue...

I think you also have the tools necessary to investigate this further.
We need to know what resource belonging to the switch is it that the
MDIO mux needs. Where is the earliest place you can add the call to
b53_mmap_mdiomux_init() such that your board works reliably? Note that
b53_switch_register() indirectly calls b53_setup(). By placing this
function where you have, the entirety of b53_setup() has finished
execution, and we don't know exactly what is it from there that is
needed.

2023-03-17 14:17:37

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

Hi Vladimir

El vie, 17 mar 2023 a las 14:04, Vladimir Oltean (<[email protected]>) escribió:
>
> On Fri, Mar 17, 2023 at 01:06:43PM +0100, Álvaro Fernández Rojas wrote:
> > Hi Vladimir,
> >
> > El vie, 17 mar 2023 a las 12:51, Vladimir Oltean (<[email protected]>) escribió:
> > >
> > > On Fri, Mar 17, 2023 at 12:34:26PM +0100, Álvaro Fernández Rojas wrote:
> > > > b53 MMAP devices have a MDIO Mux bus controller that must be registered after
> > > > properly initializing the switch. If the MDIO Mux controller is registered
> > > > from a separate driver and the device has an external switch present, it will
> > > > cause a race condition which will hang the device.
> > >
> > > Could you describe the race in more details? Why does it hang the device?
> >
> > I didn't perform a full analysis on the problem, but what I think is
> > going on is that both b53 switches are probed and both of them fail
> > due to the ethernet device not being probed yet.
> > At some point, the internal switch is reset and not fully configured
> > and the external switch is probed again, but since the internal switch
> > isn't ready, the MDIO accesses for the external switch fail due to the
> > internal switch not being ready and this hangs the device because the
> > access to the external switch is done through the same registers from
> > the internal switch.
>
> The proposed solution is too radical for a problem that was not properly
> characterized yet, so this patch set has my temporary NACK.

Forgive me, but why do you consider this solution too radical?

>
> > But maybe Florian or Jonas can give some more details about the issue...
>
> I think you also have the tools necessary to investigate this further.
> We need to know what resource belonging to the switch is it that the
> MDIO mux needs. Where is the earliest place you can add the call to
> b53_mmap_mdiomux_init() such that your board works reliably? Note that
> b53_switch_register() indirectly calls b53_setup(). By placing this
> function where you have, the entirety of b53_setup() has finished
> execution, and we don't know exactly what is it from there that is
> needed.

In the following link you will find different bootlogs related to
different scenarios all of them with the same result: any attempt of
calling b53_mmap_mdiomux_init() earlier than b53_switch_register()
will either result in a kernel panic or a device hang:
https://gist.github.com/Noltari/b0bd6d5211160ac7bf349d998d21e7f7

1. before b53_switch_register():
[ 1.756010] bcm53xx 0.1:1e: found switch: BCM53125, rev 4
[ 1.761917] bcm53xx 0.1:1e: failed to register switch: -517
[ 1.767759] b53-switch 10e00000.switch: MDIO mux bus init
[ 1.774237] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
[ 1.785673] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
[ 1.795932] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
[ 1.884320] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
[ 1.901957] NET: Registered PF_INET6 protocol family
[ 1.935223] Segment Routing with IPv6
[ 1.939160] In-situ OAM (IOAM) with IPv6
[ 1.943514] NET: Registered PF_PACKET protocol family
[ 1.949564] 8021q: 802.1Q VLAN Support v1.8
[ 1.987591] CPU 1 Unable to handle kernel paging request at virtual
address 00000000, epc == 804be000, ra == 804bbf3c
[ 1.998697] Oops[#1]:
[ 2.000995] CPU: 1 PID: 91 Comm: kworker/u4:3 Not tainted 5.15.98 #0
[ 2.007533] Workqueue: events_unbound deferred_probe_work_func
[ 2.013541] $ 0 : 00000000 00000001 804bdfd4 81ee6800
[ 2.018916] $ 4 : 834c7000 00000000 00000002 00000001
[ 2.024291] $ 8 : c0000000 00000110 00000114 00000000
[ 2.029668] $12 : 00000001 81cf2f8a fffffffc 00000000
[ 2.035043] $16 : 00000000 00000000 00000002 834bc680
[ 2.040420] $20 : 00000000 00000080 81c0700d 81f37a40
[ 2.045796] $24 : 00000018 00000000
[ 2.051171] $28 : 81f58000 81f59c80 80870000 804bbf3c
[ 2.056547] Hi : e6545baf
[ 2.059505] Lo : a4644567
[ 2.062462] epc : 804be000 mdio_mux_read+0x2c/0xd4
[ 2.067569] ra : 804bbf3c __mdiobus_read+0x20/0xc4
[ 2.072766] Status: 10008b03 KERNEL EXL IE
[ 2.077066] Cause : 00800008 (ExcCode 02)
[ 2.081187] BadVA : 00000000
[ 2.084145] PrId : 0002a070 (Broadcom BMIPS4350)
[ 2.088983] Modules linked in:
[ 2.092119] Process kworker/u4:3 (pid: 91, threadinfo=(ptrval),
task=(ptrval), tls=00000000)
[ 2.100812] Stack : 00000080 80255cfc 81c0700d 81f37a40 834c7000
00000000 00000002 834c7558
[ 2.109438] 00000002 804bbf3c 00000000 83501f78 834bb0b0 834df478
8194eae0 834c7000
[ 2.118058] 00000000 804bc020 ffffffed 83508780 00000000 00000004
834bb0b0 81f5b800
[ 2.126677] 808eb104 808eb104 81950000 804c48cc 00000003 81f5b800
81f5b800 00000000
[ 2.135297] 808eb104 81f5b800 808eb104 804bc6c0 834c7570 10008b01
81f5b800 81f5b8e0
[ 2.143925] ...
[ 2.146435] Call Trace:
[ 2.148943] [<804be000>] mdio_mux_read+0x2c/0xd4
[ 2.153697] [<804bbf3c>] __mdiobus_read+0x20/0xc4
[ 2.158533] [<804bc020>] mdiobus_read+0x40/0x6c
[ 2.163193] [<804c48cc>] b53_mdio_probe+0x38/0x16c
[ 2.168120] [<804bc6c0>] mdio_probe+0x34/0x7c
[ 2.172600] [<80437930>] really_probe.part.0+0xac/0x35c
[ 2.177976] [<80437c8c>] __driver_probe_device+0xac/0x164
[ 2.183531] [<80437d90>] driver_probe_device+0x4c/0x158
[ 2.188907] [<80438444>] __device_attach_driver+0xd0/0x15c
[ 2.194552] [<804353a0>] bus_for_each_drv+0x70/0xb0
[ 2.199569] [<804380f0>] __device_attach+0xc0/0x1d8
[ 2.204588] [<804367f4>] bus_probe_device+0x9c/0xb8
[ 2.209604] [<80436d58>] deferred_probe_work_func+0x94/0xd4
[ 2.215339] [<80058314>] process_one_work+0x290/0x4d0
[ 2.220536] [<800588ac>] worker_thread+0x358/0x614
[ 2.225464] [<80061064>] kthread+0x148/0x16c
[ 2.229854] [<80013848>] ret_from_kernel_thread+0x14/0x1c
[ 2.235413]
[ 2.236931] Code: 00a0a025 8e700004 00c09025 <8e040000> 0c1ba5d8
24840558 8e020010 8e06000c 8e65000c
[ 2.247011]
[ 2.248726] ---[ end trace 9e5942a13795eb30 ]---
[ 2.253490] Kernel panic - not syncing: Fatal exception
[ 2.258831] Rebooting in 1 seconds..

2. before dsa_register_switch():
[ 1.759901] bcm53xx 0.1:1e: failed to register switch: -19
[ 1.765837] b53-switch 10e00000.switch: MDIO mux bus init
[ 1.771412] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
[ 1.782683] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
[ 1.793149] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
[ 1.875791] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
[ 1.893480] NET: Registered PF_INET6 protocol family
[ 1.922283] Segment Routing with IPv6
[ 1.926192] In-situ OAM (IOAM) with IPv6
[ 1.930392] NET: Registered PF_PACKET protocol family
[ 1.936526] 8021q: 802.1Q VLAN Support v1.8
[ 2.245288] bcm53xx 1.1:1e: failed to register switch: -19
[ 2.251210] b53-switch 10e00000.switch: MDIO mux bus init
[ 2.256761] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
*** Device hangs ***

3. before b53_switch_init():
[ 1.757728] bcm53xx 0.1:1e: failed to register switch: -19
[ 1.763689] b53-switch 10e00000.switch: MDIO mux bus init
[ 1.769780] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
[ 1.781130] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
[ 1.790996] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
[ 1.875775] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
[ 1.893523] NET: Registered PF_INET6 protocol family
[ 1.921605] Segment Routing with IPv6
[ 1.925513] In-situ OAM (IOAM) with IPv6
[ 1.929695] NET: Registered PF_PACKET protocol family
[ 1.935809] 8021q: 802.1Q VLAN Support v1.8
[ 2.244702] bcm53xx 1.1:1e: failed to register switch: -19
[ 2.250653] b53-switch 10e00000.switch: MDIO mux bus init
[ 2.256751] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
*** Device hangs ***

I will be happy to do any more tests if needed.

Best regards,
Álvaro.

2023-03-17 14:29:35

by Vladimir Oltean

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

On Fri, Mar 17, 2023 at 03:17:12PM +0100, ?lvaro Fern?ndez Rojas wrote:
> > The proposed solution is too radical for a problem that was not properly
> > characterized yet, so this patch set has my temporary NACK.
>
> Forgive me, but why do you consider this solution too radical?

Because it involves changing device tree bindings (stable ABI) in an
incompatible way.

> >
> > > But maybe Florian or Jonas can give some more details about the issue...
> >
> > I think you also have the tools necessary to investigate this further.
> > We need to know what resource belonging to the switch is it that the
> > MDIO mux needs. Where is the earliest place you can add the call to
> > b53_mmap_mdiomux_init() such that your board works reliably? Note that
> > b53_switch_register() indirectly calls b53_setup(). By placing this
> > function where you have, the entirety of b53_setup() has finished
> > execution, and we don't know exactly what is it from there that is
> > needed.
>
> In the following link you will find different bootlogs related to
> different scenarios all of them with the same result: any attempt of
> calling b53_mmap_mdiomux_init() earlier than b53_switch_register()
> will either result in a kernel panic or a device hang:
> https://gist.github.com/Noltari/b0bd6d5211160ac7bf349d998d21e7f7
>
> 1. before b53_switch_register():
>
> 2. before dsa_register_switch():
>
> 3. before b53_switch_init():

Did you read what I said?

| Note that b53_switch_register() indirectly calls b53_setup(). By placing
| this function where you have, the entirety of b53_setup() has finished
| execution, and we don't know exactly what is it from there that is
| needed.

Can you place the b53_mmap_mdiomux_init() in various places within
b53_setup() to restrict the search further?

2023-03-17 16:23:53

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

El vie, 17 mar 2023 a las 15:29, Vladimir Oltean (<[email protected]>) escribió:
>
> On Fri, Mar 17, 2023 at 03:17:12PM +0100, Álvaro Fernández Rojas wrote:
> > > The proposed solution is too radical for a problem that was not properly
> > > characterized yet, so this patch set has my temporary NACK.
> >
> > Forgive me, but why do you consider this solution too radical?
>
> Because it involves changing device tree bindings (stable ABI) in an
> incompatible way.
>
> > >
> > > > But maybe Florian or Jonas can give some more details about the issue...
> > >
> > > I think you also have the tools necessary to investigate this further.
> > > We need to know what resource belonging to the switch is it that the
> > > MDIO mux needs. Where is the earliest place you can add the call to
> > > b53_mmap_mdiomux_init() such that your board works reliably? Note that
> > > b53_switch_register() indirectly calls b53_setup(). By placing this
> > > function where you have, the entirety of b53_setup() has finished
> > > execution, and we don't know exactly what is it from there that is
> > > needed.
> >
> > In the following link you will find different bootlogs related to
> > different scenarios all of them with the same result: any attempt of
> > calling b53_mmap_mdiomux_init() earlier than b53_switch_register()
> > will either result in a kernel panic or a device hang:
> > https://gist.github.com/Noltari/b0bd6d5211160ac7bf349d998d21e7f7
> >
> > 1. before b53_switch_register():
> >
> > 2. before dsa_register_switch():
> >
> > 3. before b53_switch_init():
>
> Did you read what I said?

Yes, but I didn't get your point, sorry for that.

>
> | Note that b53_switch_register() indirectly calls b53_setup(). By placing
> | this function where you have, the entirety of b53_setup() has finished
> | execution, and we don't know exactly what is it from there that is
> | needed.
>
> Can you place the b53_mmap_mdiomux_init() in various places within
> b53_setup() to restrict the search further?

I tried and these are the results:
https://gist.github.com/Noltari/d5bdba66b8f2e392c9e4c2759661d862

All of them hang when dsa_tree_setup() is called for DSA tree 1
(external switch) without having completely setup DSA tree 0 (internal
switch):
[ 1.471345] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
[ 1.481099] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
[ 1.506752] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
[ 1.594365] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
[ 1.612008] NET: Registered PF_INET6 protocol family
[ 1.645617] Segment Routing with IPv6
[ 1.649547] In-situ OAM (IOAM) with IPv6
[ 1.653948] NET: Registered PF_PACKET protocol family
[ 1.659984] 8021q: 802.1Q VLAN Support v1.8
[ 1.699193] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
[ 2.124257] bcm53xx 0.1:1e: found switch: BCM53125, rev 4
*** Device hang ***

I don't know if there's a way to defer the probe of DSA tree 1 (the
external switch) until DSA tree 0 (the internal switch) is completely
setup, because that would probably be the only alternative way of
fixing this.

--
Álvaro.

2023-03-17 16:27:29

by Andrew Lunn

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

> > The proposed solution is too radical for a problem that was not properly
> > characterized yet, so this patch set has my temporary NACK.
>
> Forgive me, but why do you consider this solution too radical?

I have to agree with Vladimir here. The problem is not the driver, but
when the driver is instantiated. It seems radical to remove a driver
just because it loads at the wrong time. Ideally you want the driver
to figure out now is not a good time and return -EPROBE_DEFER, because
a resource it requires it not available.

Andrew

2023-03-17 16:31:09

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

El vie, 17 mar 2023 a las 17:27, Andrew Lunn (<[email protected]>) escribió:
>
> > > The proposed solution is too radical for a problem that was not properly
> > > characterized yet, so this patch set has my temporary NACK.
> >
> > Forgive me, but why do you consider this solution too radical?
>
> I have to agree with Vladimir here. The problem is not the driver, but
> when the driver is instantiated. It seems radical to remove a driver
> just because it loads at the wrong time. Ideally you want the driver
> to figure out now is not a good time and return -EPROBE_DEFER, because
> a resource it requires it not available.

Ok, I'm open to suggestions.
Any ideas on how exactly to figure out when it's a good time to probe
or return -EPROBE_DEFER instead?

>
> Andrew

--
Álvaro.

2023-03-17 16:37:53

by Andrew Lunn

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

On Fri, Mar 17, 2023 at 05:30:38PM +0100, ?lvaro Fern?ndez Rojas wrote:
> El vie, 17 mar 2023 a las 17:27, Andrew Lunn (<[email protected]>) escribi?:
> >
> > > > The proposed solution is too radical for a problem that was not properly
> > > > characterized yet, so this patch set has my temporary NACK.
> > >
> > > Forgive me, but why do you consider this solution too radical?
> >
> > I have to agree with Vladimir here. The problem is not the driver, but
> > when the driver is instantiated. It seems radical to remove a driver
> > just because it loads at the wrong time. Ideally you want the driver
> > to figure out now is not a good time and return -EPROBE_DEFER, because
> > a resource it requires it not available.
>
> Ok, I'm open to suggestions.
> Any ideas on how exactly to figure out when it's a good time to probe
> or return -EPROBE_DEFER instead?

Vladimir already said:

> > > We need to know what resource belonging to the switch is it that the
> > > MDIO mux needs.

Please answer that question. Once we know what the resource is, we can
look at how to export it to the mux in a way that is safe.

Andrew

2023-03-17 16:41:40

by Florian Fainelli

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

On 3/17/23 09:23, Álvaro Fernández Rojas wrote:
> El vie, 17 mar 2023 a las 15:29, Vladimir Oltean (<[email protected]>) escribió:
>>
>> On Fri, Mar 17, 2023 at 03:17:12PM +0100, Álvaro Fernández Rojas wrote:
>>>> The proposed solution is too radical for a problem that was not properly
>>>> characterized yet, so this patch set has my temporary NACK.
>>>
>>> Forgive me, but why do you consider this solution too radical?
>>
>> Because it involves changing device tree bindings (stable ABI) in an
>> incompatible way.
>>
>>>>
>>>>> But maybe Florian or Jonas can give some more details about the issue...
>>>>
>>>> I think you also have the tools necessary to investigate this further.
>>>> We need to know what resource belonging to the switch is it that the
>>>> MDIO mux needs. Where is the earliest place you can add the call to
>>>> b53_mmap_mdiomux_init() such that your board works reliably? Note that
>>>> b53_switch_register() indirectly calls b53_setup(). By placing this
>>>> function where you have, the entirety of b53_setup() has finished
>>>> execution, and we don't know exactly what is it from there that is
>>>> needed.
>>>
>>> In the following link you will find different bootlogs related to
>>> different scenarios all of them with the same result: any attempt of
>>> calling b53_mmap_mdiomux_init() earlier than b53_switch_register()
>>> will either result in a kernel panic or a device hang:
>>> https://gist.github.com/Noltari/b0bd6d5211160ac7bf349d998d21e7f7
>>>
>>> 1. before b53_switch_register():
>>>
>>> 2. before dsa_register_switch():
>>>
>>> 3. before b53_switch_init():
>>
>> Did you read what I said?
>
> Yes, but I didn't get your point, sorry for that.
>
>>
>> | Note that b53_switch_register() indirectly calls b53_setup(). By placing
>> | this function where you have, the entirety of b53_setup() has finished
>> | execution, and we don't know exactly what is it from there that is
>> | needed.
>>
>> Can you place the b53_mmap_mdiomux_init() in various places within
>> b53_setup() to restrict the search further?
>
> I tried and these are the results:
> https://gist.github.com/Noltari/d5bdba66b8f2e392c9e4c2759661d862
>
> All of them hang when dsa_tree_setup() is called for DSA tree 1
> (external switch) without having completely setup DSA tree 0 (internal
> switch):
> [ 1.471345] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> [ 1.481099] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
> [ 1.506752] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
> [ 1.594365] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
> [ 1.612008] NET: Registered PF_INET6 protocol family
> [ 1.645617] Segment Routing with IPv6
> [ 1.649547] In-situ OAM (IOAM) with IPv6
> [ 1.653948] NET: Registered PF_PACKET protocol family
> [ 1.659984] 8021q: 802.1Q VLAN Support v1.8
> [ 1.699193] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> [ 2.124257] bcm53xx 0.1:1e: found switch: BCM53125, rev 4
> *** Device hang ***
>
> I don't know if there's a way to defer the probe of DSA tree 1 (the
> external switch) until DSA tree 0 (the internal switch) is completely
> setup, because that would probably be the only alternative way of
> fixing this.

Could you find out which part is hanging? It looks like there is a busy
waiting operation that we never complete?

DSA should be perfectly capable of dealing with disjoint trees being
cascaded to one another, as this is entirely within how the framework is
designed.

What I suspect might be happening is a "double programming" effect,
similar or identical to what was described in this commit:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b8c6cd1d316f3b01ae578d8e29179f6396c0eaa2

using the MDIO mux would properly isolate the pseudo PHYs of the switch
such that a given MDIO write does not end up programming *both* the
internal and external switches. It could also be a completely different
problem.
--
Florian


2023-03-17 16:46:03

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

El vie, 17 mar 2023 a las 17:41, Florian Fainelli
(<[email protected]>) escribió:
>
> On 3/17/23 09:23, Álvaro Fernández Rojas wrote:
> > El vie, 17 mar 2023 a las 15:29, Vladimir Oltean (<[email protected]>) escribió:
> >>
> >> On Fri, Mar 17, 2023 at 03:17:12PM +0100, Álvaro Fernández Rojas wrote:
> >>>> The proposed solution is too radical for a problem that was not properly
> >>>> characterized yet, so this patch set has my temporary NACK.
> >>>
> >>> Forgive me, but why do you consider this solution too radical?
> >>
> >> Because it involves changing device tree bindings (stable ABI) in an
> >> incompatible way.
> >>
> >>>>
> >>>>> But maybe Florian or Jonas can give some more details about the issue...
> >>>>
> >>>> I think you also have the tools necessary to investigate this further.
> >>>> We need to know what resource belonging to the switch is it that the
> >>>> MDIO mux needs. Where is the earliest place you can add the call to
> >>>> b53_mmap_mdiomux_init() such that your board works reliably? Note that
> >>>> b53_switch_register() indirectly calls b53_setup(). By placing this
> >>>> function where you have, the entirety of b53_setup() has finished
> >>>> execution, and we don't know exactly what is it from there that is
> >>>> needed.
> >>>
> >>> In the following link you will find different bootlogs related to
> >>> different scenarios all of them with the same result: any attempt of
> >>> calling b53_mmap_mdiomux_init() earlier than b53_switch_register()
> >>> will either result in a kernel panic or a device hang:
> >>> https://gist.github.com/Noltari/b0bd6d5211160ac7bf349d998d21e7f7
> >>>
> >>> 1. before b53_switch_register():
> >>>
> >>> 2. before dsa_register_switch():
> >>>
> >>> 3. before b53_switch_init():
> >>
> >> Did you read what I said?
> >
> > Yes, but I didn't get your point, sorry for that.
> >
> >>
> >> | Note that b53_switch_register() indirectly calls b53_setup(). By placing
> >> | this function where you have, the entirety of b53_setup() has finished
> >> | execution, and we don't know exactly what is it from there that is
> >> | needed.
> >>
> >> Can you place the b53_mmap_mdiomux_init() in various places within
> >> b53_setup() to restrict the search further?
> >
> > I tried and these are the results:
> > https://gist.github.com/Noltari/d5bdba66b8f2e392c9e4c2759661d862
> >
> > All of them hang when dsa_tree_setup() is called for DSA tree 1
> > (external switch) without having completely setup DSA tree 0 (internal
> > switch):
> > [ 1.471345] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> > [ 1.481099] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
> > [ 1.506752] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
> > [ 1.594365] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
> > [ 1.612008] NET: Registered PF_INET6 protocol family
> > [ 1.645617] Segment Routing with IPv6
> > [ 1.649547] In-situ OAM (IOAM) with IPv6
> > [ 1.653948] NET: Registered PF_PACKET protocol family
> > [ 1.659984] 8021q: 802.1Q VLAN Support v1.8
> > [ 1.699193] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> > [ 2.124257] bcm53xx 0.1:1e: found switch: BCM53125, rev 4
> > *** Device hang ***
> >
> > I don't know if there's a way to defer the probe of DSA tree 1 (the
> > external switch) until DSA tree 0 (the internal switch) is completely
> > setup, because that would probably be the only alternative way of
> > fixing this.
>
> Could you find out which part is hanging? It looks like there is a busy
> waiting operation that we never complete?

I don't think so, but I will try to debug the exact issue and report back.

>
> DSA should be perfectly capable of dealing with disjoint trees being
> cascaded to one another, as this is entirely within how the framework is
> designed.
>
> What I suspect might be happening is a "double programming" effect,
> similar or identical to what was described in this commit:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b8c6cd1d316f3b01ae578d8e29179f6396c0eaa2

Thanks for the info, I will look into this.

>
> using the MDIO mux would properly isolate the pseudo PHYs of the switch
> such that a given MDIO write does not end up programming *both* the
> internal and external switches. It could also be a completely different
> problem.
> --
> Florian
>

2023-03-19 09:45:27

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

El vie, 17 mar 2023 a las 17:41, Florian Fainelli
(<[email protected]>) escribió:
>
> On 3/17/23 09:23, Álvaro Fernández Rojas wrote:
> > El vie, 17 mar 2023 a las 15:29, Vladimir Oltean (<[email protected]>) escribió:
> >>
> >> On Fri, Mar 17, 2023 at 03:17:12PM +0100, Álvaro Fernández Rojas wrote:
> >>>> The proposed solution is too radical for a problem that was not properly
> >>>> characterized yet, so this patch set has my temporary NACK.
> >>>
> >>> Forgive me, but why do you consider this solution too radical?
> >>
> >> Because it involves changing device tree bindings (stable ABI) in an
> >> incompatible way.
> >>
> >>>>
> >>>>> But maybe Florian or Jonas can give some more details about the issue...
> >>>>
> >>>> I think you also have the tools necessary to investigate this further.
> >>>> We need to know what resource belonging to the switch is it that the
> >>>> MDIO mux needs. Where is the earliest place you can add the call to
> >>>> b53_mmap_mdiomux_init() such that your board works reliably? Note that
> >>>> b53_switch_register() indirectly calls b53_setup(). By placing this
> >>>> function where you have, the entirety of b53_setup() has finished
> >>>> execution, and we don't know exactly what is it from there that is
> >>>> needed.
> >>>
> >>> In the following link you will find different bootlogs related to
> >>> different scenarios all of them with the same result: any attempt of
> >>> calling b53_mmap_mdiomux_init() earlier than b53_switch_register()
> >>> will either result in a kernel panic or a device hang:
> >>> https://gist.github.com/Noltari/b0bd6d5211160ac7bf349d998d21e7f7
> >>>
> >>> 1. before b53_switch_register():
> >>>
> >>> 2. before dsa_register_switch():
> >>>
> >>> 3. before b53_switch_init():
> >>
> >> Did you read what I said?
> >
> > Yes, but I didn't get your point, sorry for that.
> >
> >>
> >> | Note that b53_switch_register() indirectly calls b53_setup(). By placing
> >> | this function where you have, the entirety of b53_setup() has finished
> >> | execution, and we don't know exactly what is it from there that is
> >> | needed.
> >>
> >> Can you place the b53_mmap_mdiomux_init() in various places within
> >> b53_setup() to restrict the search further?
> >
> > I tried and these are the results:
> > https://gist.github.com/Noltari/d5bdba66b8f2e392c9e4c2759661d862
> >
> > All of them hang when dsa_tree_setup() is called for DSA tree 1
> > (external switch) without having completely setup DSA tree 0 (internal
> > switch):
> > [ 1.471345] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> > [ 1.481099] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
> > [ 1.506752] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
> > [ 1.594365] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
> > [ 1.612008] NET: Registered PF_INET6 protocol family
> > [ 1.645617] Segment Routing with IPv6
> > [ 1.649547] In-situ OAM (IOAM) with IPv6
> > [ 1.653948] NET: Registered PF_PACKET protocol family
> > [ 1.659984] 8021q: 802.1Q VLAN Support v1.8
> > [ 1.699193] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> > [ 2.124257] bcm53xx 0.1:1e: found switch: BCM53125, rev 4
> > *** Device hang ***
> >
> > I don't know if there's a way to defer the probe of DSA tree 1 (the
> > external switch) until DSA tree 0 (the internal switch) is completely
> > setup, because that would probably be the only alternative way of
> > fixing this.
>
> Could you find out which part is hanging? It looks like there is a busy
> waiting operation that we never complete?

After many tests I was able to find the part that was hanging the device.
It turns out that if the MDIO bus controller is registered soon
enough, b53_phy_read16 will be called for the RGMII port on the
internal switch:
[ 4.042698] b53-switch 10e00000.switch: b53_phy_read16: ds=81fede80
phy_read16=00000000 addr=4 reg=2
It turns out that the device is hanging on the following line of
b53_phy_read16():
b53_read16(priv, B53_PORT_MII_PAGE(addr), reg * 2, &value);
Maybe it's not safe to access B53_PORT_MII_PAGE() on MMAP switches?

Only in one specific image in which I had a lot of debugging this
access didn't hang, but it just returned 0:
[ 5.129715] b53_mmap_write16: dev=83547680 page=0 reg=3c val=100
[ 5.135914] b53_mmap_write16: dev=83547680 page=0 reg=3c val=100 done!
[ 5.143721] b53-switch 10e00000.switch: b53_phy_read16: ds=83547580
phy_read16=00000000 addr=4 reg=2
[ 5.153204] b53-switch 10e00000.switch: b53_phy_read16: ds=83547580
phy_read16=00000000 addr=4 reg=2 val=0
[ 5.163171] b53-switch 10e00000.switch: b53_phy_read16: ds=83547580
phy_read16=00000000 addr=4 reg=3
[ 5.172560] b53-switch 10e00000.switch: b53_phy_read16: ds=83547580
phy_read16=00000000 addr=4 reg=3 val=0
[ 5.218764] b53-switch 10e00000.switch: Using legacy PHYLIB callbacks.
Please migrate to PHYLINK!

However, if I implement b53_mmap_phy_read16() and
b53_mmap_phy_write16() in MMAP it seems to solve the issue and the
device doesn't hang anymore:
[ 2.783407] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
[ 2.951877] b53-switch 10e00000.switch: b53_phy_read16: addr=4 reg=2
[ 2.958393] b53_mmap_phy_read16: dev=836f6580 phy_id=4 loc=2
[ 2.964367] b53_mmap_phy_read16: dev=836f6580 phy_id=4 loc=2 val=362
[ 2.970923] b53-switch 10e00000.switch: b53_phy_read16: addr=4 reg=3
[ 2.977420] b53_mmap_phy_read16: dev=836f6580 phy_id=4 loc=3
[ 2.983315] b53_mmap_phy_read16: dev=836f6580 phy_id=4 loc=3 val=5e80
[ 3.026253] b53-switch 10e00000.switch: Using legacy PHYLIB callbacks.
Please migrate to PHYLINK!
[ 3.072584] b53-switch 10e00000.switch: Configured port 8 for internal
[ 3.082850] DSA: tree 0 setup

However, what I did is just replicating mdio-mux-bcm6368 source code
in MMAP (for the internal PHY only):
static int b53_mmap_phy_read16(struct b53_device *dev, int phy_id, int
loc, u16 *val)
{
uint32_t reg;

b53_mmap_write32(dev, 0, REG_MDIOC, 0);

reg = REG_MDIOC_RD_MASK |
(phy_id << REG_MDIOC_PHYID_SHIFT) |
(loc << REG_MDIOC_REG_SHIFT);

b53_mmap_write32(dev, 0, REG_MDIOC, reg);
udelay(50);
b53_mmap_read16(dev, 0, REG_MDIOD, val);

return 0;
}

static int b53_mmap_phy_write16(struct b53_device *dev, int phy_id,
int loc, u16 val)
{
uint32_t reg;

b53_mmap_write32(dev, 0, REG_MDIOC, 0);

reg = REG_MDIOC_WR_MASK |
(phy_id << REG_MDIOC_PHYID_SHIFT) |
(loc << REG_MDIOC_REG_SHIFT) |
val;

b53_mmap_write32(dev, 0, REG_MDIOC, reg);
udelay(50);

return 0;
}

Is it safe to add those functions in MMAP or is there a way of forcing
the use of mdio-mux-bcm6368 for those PHY accesses?

>
> DSA should be perfectly capable of dealing with disjoint trees being
> cascaded to one another, as this is entirely within how the framework is
> designed.
>
> What I suspect might be happening is a "double programming" effect,
> similar or identical to what was described in this commit:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b8c6cd1d316f3b01ae578d8e29179f6396c0eaa2
>
> using the MDIO mux would properly isolate the pseudo PHYs of the switch
> such that a given MDIO write does not end up programming *both* the
> internal and external switches. It could also be a completely different
> problem.
> --
> Florian
>

--
Álvaro

2023-03-19 11:35:41

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH 1/3] dt-bindings: net: move bcm6368-mdio-mux bindings to b53

On 17/03/2023 12:34, Álvaro Fernández Rojas wrote:
> b53 MMAP devices have a MDIO Mux bus controller that must be registered after
> properly initializing the switch. If the MDIO Mux controller is registered
> from a separate driver and the device has an external switch present, it will
> cause a race condition which will hang the device.

Are you sure it is wrapped according to Linux coding style?
https://elixir.bootlin.com/linux/v5.18-rc4/source/Documentation/process/submitting-patches.rst#L586

Your rationale does not justify incompatible binding change, especially
that it could be probably solved by including the child in the parent.

Anyway, Linux driver boot order is not a reason to change bindings really.


Best regards,
Krzysztof


2023-03-19 11:37:01

by Krzysztof Kozlowski

[permalink] [raw]
Subject: Re: [PATCH 3/3] net: mdio: remove BCM6368 MDIO mux bus driver

On 17/03/2023 12:34, Álvaro Fernández Rojas wrote:
> This driver is now registered from DSA B53 MMAP switches.
>
> Signed-off-by: Álvaro Fernández Rojas <[email protected]>

(...)

> -
> -static int bcm6368_mdiomux_remove(struct platform_device *pdev)
> -{
> - struct bcm6368_mdiomux_desc *md = platform_get_drvdata(pdev);
> -
> - mdio_mux_uninit(md->mux_handle);
> - mdiobus_unregister(md->mii_bus);
> -
> - return 0;
> -}
> -
> -static const struct of_device_id bcm6368_mdiomux_ids[] = {
> - { .compatible = "brcm,bcm6368-mdio-mux", },

This is an ABI break.

Best regards,
Krzysztof


2023-03-20 10:27:28

by Jonas Gorski

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

On Sun, 19 Mar 2023 at 10:45, Álvaro Fernández Rojas <[email protected]> wrote:
>
> El vie, 17 mar 2023 a las 17:41, Florian Fainelli
> (<[email protected]>) escribió:
> >
> > On 3/17/23 09:23, Álvaro Fernández Rojas wrote:
> > > El vie, 17 mar 2023 a las 15:29, Vladimir Oltean (<[email protected]>) escribió:
> > >>
> > >> On Fri, Mar 17, 2023 at 03:17:12PM +0100, Álvaro Fernández Rojas wrote:
> > >>>> The proposed solution is too radical for a problem that was not properly
> > >>>> characterized yet, so this patch set has my temporary NACK.
> > >>>
> > >>> Forgive me, but why do you consider this solution too radical?
> > >>
> > >> Because it involves changing device tree bindings (stable ABI) in an
> > >> incompatible way.
> > >>
> > >>>>
> > >>>>> But maybe Florian or Jonas can give some more details about the issue...
> > >>>>
> > >>>> I think you also have the tools necessary to investigate this further.
> > >>>> We need to know what resource belonging to the switch is it that the
> > >>>> MDIO mux needs. Where is the earliest place you can add the call to
> > >>>> b53_mmap_mdiomux_init() such that your board works reliably? Note that
> > >>>> b53_switch_register() indirectly calls b53_setup(). By placing this
> > >>>> function where you have, the entirety of b53_setup() has finished
> > >>>> execution, and we don't know exactly what is it from there that is
> > >>>> needed.
> > >>>
> > >>> In the following link you will find different bootlogs related to
> > >>> different scenarios all of them with the same result: any attempt of
> > >>> calling b53_mmap_mdiomux_init() earlier than b53_switch_register()
> > >>> will either result in a kernel panic or a device hang:
> > >>> https://gist.github.com/Noltari/b0bd6d5211160ac7bf349d998d21e7f7
> > >>>
> > >>> 1. before b53_switch_register():
> > >>>
> > >>> 2. before dsa_register_switch():
> > >>>
> > >>> 3. before b53_switch_init():
> > >>
> > >> Did you read what I said?
> > >
> > > Yes, but I didn't get your point, sorry for that.
> > >
> > >>
> > >> | Note that b53_switch_register() indirectly calls b53_setup(). By placing
> > >> | this function where you have, the entirety of b53_setup() has finished
> > >> | execution, and we don't know exactly what is it from there that is
> > >> | needed.
> > >>
> > >> Can you place the b53_mmap_mdiomux_init() in various places within
> > >> b53_setup() to restrict the search further?
> > >
> > > I tried and these are the results:
> > > https://gist.github.com/Noltari/d5bdba66b8f2e392c9e4c2759661d862
> > >
> > > All of them hang when dsa_tree_setup() is called for DSA tree 1
> > > (external switch) without having completely setup DSA tree 0 (internal
> > > switch):
> > > [ 1.471345] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> > > [ 1.481099] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
> > > [ 1.506752] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
> > > [ 1.594365] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
> > > [ 1.612008] NET: Registered PF_INET6 protocol family
> > > [ 1.645617] Segment Routing with IPv6
> > > [ 1.649547] In-situ OAM (IOAM) with IPv6
> > > [ 1.653948] NET: Registered PF_PACKET protocol family
> > > [ 1.659984] 8021q: 802.1Q VLAN Support v1.8
> > > [ 1.699193] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> > > [ 2.124257] bcm53xx 0.1:1e: found switch: BCM53125, rev 4
> > > *** Device hang ***
> > >
> > > I don't know if there's a way to defer the probe of DSA tree 1 (the
> > > external switch) until DSA tree 0 (the internal switch) is completely
> > > setup, because that would probably be the only alternative way of
> > > fixing this.
> >
> > Could you find out which part is hanging? It looks like there is a busy
> > waiting operation that we never complete?
>
> After many tests I was able to find the part that was hanging the device.
> It turns out that if the MDIO bus controller is registered soon
> enough, b53_phy_read16 will be called for the RGMII port on the
> internal switch:
> [ 4.042698] b53-switch 10e00000.switch: b53_phy_read16: ds=81fede80
> phy_read16=00000000 addr=4 reg=2
> It turns out that the device is hanging on the following line of
> b53_phy_read16():
> b53_read16(priv, B53_PORT_MII_PAGE(addr), reg * 2, &value);
> Maybe it's not safe to access B53_PORT_MII_PAGE() on MMAP switches?

If you are following the example from 1/3, then I think I see what the
issue might be here:

You are labeling the port where the external switch is connected as
"extsw", which is neither "cpu" nor "dsa", so it is treated as a
normal/user port (which it shouldn't). If you change its label to
"dsa" (which AFAIU would be the correct one to denote a daisy chained
switch) it should not try to access port 4's MDIO registers (via the
slave mdio bus).

Can you check if that helps?

Regards
Jonas

2023-03-20 15:29:10

by Álvaro Fernández Rojas

[permalink] [raw]
Subject: Re: [PATCH 2/3] net: dsa: b53: mmap: register MDIO Mux bus controller

El lun, 20 mar 2023 a las 11:27, Jonas Gorski
(<[email protected]>) escribió:
>
> On Sun, 19 Mar 2023 at 10:45, Álvaro Fernández Rojas <[email protected]> wrote:
> >
> > El vie, 17 mar 2023 a las 17:41, Florian Fainelli
> > (<[email protected]>) escribió:
> > >
> > > On 3/17/23 09:23, Álvaro Fernández Rojas wrote:
> > > > El vie, 17 mar 2023 a las 15:29, Vladimir Oltean (<[email protected]>) escribió:
> > > >>
> > > >> On Fri, Mar 17, 2023 at 03:17:12PM +0100, Álvaro Fernández Rojas wrote:
> > > >>>> The proposed solution is too radical for a problem that was not properly
> > > >>>> characterized yet, so this patch set has my temporary NACK.
> > > >>>
> > > >>> Forgive me, but why do you consider this solution too radical?
> > > >>
> > > >> Because it involves changing device tree bindings (stable ABI) in an
> > > >> incompatible way.
> > > >>
> > > >>>>
> > > >>>>> But maybe Florian or Jonas can give some more details about the issue...
> > > >>>>
> > > >>>> I think you also have the tools necessary to investigate this further.
> > > >>>> We need to know what resource belonging to the switch is it that the
> > > >>>> MDIO mux needs. Where is the earliest place you can add the call to
> > > >>>> b53_mmap_mdiomux_init() such that your board works reliably? Note that
> > > >>>> b53_switch_register() indirectly calls b53_setup(). By placing this
> > > >>>> function where you have, the entirety of b53_setup() has finished
> > > >>>> execution, and we don't know exactly what is it from there that is
> > > >>>> needed.
> > > >>>
> > > >>> In the following link you will find different bootlogs related to
> > > >>> different scenarios all of them with the same result: any attempt of
> > > >>> calling b53_mmap_mdiomux_init() earlier than b53_switch_register()
> > > >>> will either result in a kernel panic or a device hang:
> > > >>> https://gist.github.com/Noltari/b0bd6d5211160ac7bf349d998d21e7f7
> > > >>>
> > > >>> 1. before b53_switch_register():
> > > >>>
> > > >>> 2. before dsa_register_switch():
> > > >>>
> > > >>> 3. before b53_switch_init():
> > > >>
> > > >> Did you read what I said?
> > > >
> > > > Yes, but I didn't get your point, sorry for that.
> > > >
> > > >>
> > > >> | Note that b53_switch_register() indirectly calls b53_setup(). By placing
> > > >> | this function where you have, the entirety of b53_setup() has finished
> > > >> | execution, and we don't know exactly what is it from there that is
> > > >> | needed.
> > > >>
> > > >> Can you place the b53_mmap_mdiomux_init() in various places within
> > > >> b53_setup() to restrict the search further?
> > > >
> > > > I tried and these are the results:
> > > > https://gist.github.com/Noltari/d5bdba66b8f2e392c9e4c2759661d862
> > > >
> > > > All of them hang when dsa_tree_setup() is called for DSA tree 1
> > > > (external switch) without having completely setup DSA tree 0 (internal
> > > > switch):
> > > > [ 1.471345] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> > > > [ 1.481099] bcm6368-enetsw 1000d800.ethernet: IRQ tx not found
> > > > [ 1.506752] bcm6368-enetsw 1000d800.ethernet: mtd mac 4c:60:de:86:52:12
> > > > [ 1.594365] bcm7038-wdt 1000005c.watchdog: Registered BCM7038 Watchdog
> > > > [ 1.612008] NET: Registered PF_INET6 protocol family
> > > > [ 1.645617] Segment Routing with IPv6
> > > > [ 1.649547] In-situ OAM (IOAM) with IPv6
> > > > [ 1.653948] NET: Registered PF_PACKET protocol family
> > > > [ 1.659984] 8021q: 802.1Q VLAN Support v1.8
> > > > [ 1.699193] b53-switch 10e00000.switch: found switch: BCM63xx, rev 0
> > > > [ 2.124257] bcm53xx 0.1:1e: found switch: BCM53125, rev 4
> > > > *** Device hang ***
> > > >
> > > > I don't know if there's a way to defer the probe of DSA tree 1 (the
> > > > external switch) until DSA tree 0 (the internal switch) is completely
> > > > setup, because that would probably be the only alternative way of
> > > > fixing this.
> > >
> > > Could you find out which part is hanging? It looks like there is a busy
> > > waiting operation that we never complete?
> >
> > After many tests I was able to find the part that was hanging the device.
> > It turns out that if the MDIO bus controller is registered soon
> > enough, b53_phy_read16 will be called for the RGMII port on the
> > internal switch:
> > [ 4.042698] b53-switch 10e00000.switch: b53_phy_read16: ds=81fede80
> > phy_read16=00000000 addr=4 reg=2
> > It turns out that the device is hanging on the following line of
> > b53_phy_read16():
> > b53_read16(priv, B53_PORT_MII_PAGE(addr), reg * 2, &value);
> > Maybe it's not safe to access B53_PORT_MII_PAGE() on MMAP switches?
>
> If you are following the example from 1/3, then I think I see what the
> issue might be here:
>
> You are labeling the port where the external switch is connected as
> "extsw", which is neither "cpu" nor "dsa", so it is treated as a
> normal/user port (which it shouldn't). If you change its label to
> "dsa" (which AFAIU would be the correct one to denote a daisy chained
> switch) it should not try to access port 4's MDIO registers (via the
> slave mdio bus).

Correct me if I'm wrong, but I think that the configuration you're
suggesting would be for a different kind of switches layout.
In this case I'm using a disjoint tree setup since both switches are
using different tags incompatible with each other.

So the proper switch layout should be the following (for a Huawei
HG253s, which is a BCM6362 with a WAN port on the internal switch port
5 and the external switch BCM53124S connected to the internal switch
on port 4):
https://gist.github.com/Noltari/975374f908bb056ecf2544d289255b2e#file-b53-disjoint-switch-trees-hg253s-v2-dts

BTW, this is the log (as you can see b53_mmap_phy_read16() is used for
ports 4 & 5 at the beginning. After the switch initialization, the
mdio-mux bus controller is used):
https://gist.github.com/Noltari/975374f908bb056ecf2544d289255b2e#file-b53-disjoint-switch-trees-hg253s-v2-log

I think that we need this kind of layout because as we already
discussed on "net: dsa: tag_brcm: legacy: fix daisy-chained switches"
(https://patchwork.kernel.org/project/netdevbpf/patch/[email protected]/)
it's the only way of removing the incorrect VLAN tag added by the
internal BCM63xx switch when it receives a packet from the external
switch on port 4 (RGMII).
Otherwise, the double tag won't be processed correctly due to the
invalid VLAN tag and the packet won't be correctly forwarded from the
external switch to the internal switch.

But I may be wrong here since I don't have much experience with DSA...
That's why I decided to upstream all the patches that I sent lately,
to seek for help of the experts :)

>
> Can you check if that helps?
>
> Regards
> Jonas

Best regards,
Álvaro.