2024-05-07 10:29:37

by Maxime Chevallier

[permalink] [raw]
Subject: [PATCH net-next 0/2] Fix phy_link_topology initialization

Nathan and Heiner reported issues that occur when phylib and phy drivers
built as modules expect the phy_link_topology to be initialized, due to
wrong use of IS_REACHABLE.

This small fixup series addresses that by moving the initialization code
into net/core/dev.c, but at the same time implementing lazy
initialization to only allocate the topology upon the first PHY
insertion.

This needed some refactoring, namely pass the netdevice itself as a
parameter for phy_link_topology helpers.

Thanks Heiner for the help on untangling this, and Nathan for the
report.

Maxime Chevallier (2):
net: phy: phy_link_topology: Pass netdevice to phy_link_topo helpers
net: phy: phy_link_topology: Lazy-initialize the link topology

drivers/net/phy/phy_device.c | 25 +++++----------
drivers/net/phy/phy_link_topology.c | 44 +++++++++++---------------
include/linux/netdevice.h | 2 ++
include/linux/phy_link_topology.h | 40 +++++++++++++----------
include/linux/phy_link_topology_core.h | 23 +++-----------
net/core/dev.c | 38 ++++++++++++++++++----
net/ethtool/netlink.c | 2 +-
7 files changed, 88 insertions(+), 86 deletions(-)

--
2.44.0



2024-05-07 10:29:53

by Maxime Chevallier

[permalink] [raw]
Subject: [PATCH net-next 1/2] net: phy: phy_link_topology: Pass netdevice to phy_link_topo helpers

The phy link topology's main goal is to better track which PHYs are
connected to a given netdevice. Make so that the helpers take the
netdevice as a parameter directly.

Signed-off-by: Maxime Chevallier <[email protected]>
Fixes: 6916e461e793 ("net: phy: Introduce ethernet link topology representation")
Closes: https://lore.kernel.org/netdev/[email protected]/
Closes: https://lore.kernel.org/netdev/[email protected]/
---
drivers/net/phy/phy_device.c | 25 ++++++++-----------------
drivers/net/phy/phy_link_topology.c | 13 ++++++++++---
include/linux/phy_link_topology.h | 21 +++++++++++++--------
net/ethtool/netlink.c | 2 +-
4 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 616bd7ba46cb..111434201545 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -277,14 +277,6 @@ static void phy_mdio_device_remove(struct mdio_device *mdiodev)

static struct phy_driver genphy_driver;

-static struct phy_link_topology *phy_get_link_topology(struct phy_device *phydev)
-{
- if (phydev->attached_dev)
- return phydev->attached_dev->link_topo;
-
- return NULL;
-}
-
static LIST_HEAD(phy_fixup_list);
static DEFINE_MUTEX(phy_fixup_lock);

@@ -1389,10 +1381,10 @@ static DEVICE_ATTR_RO(phy_standalone);
int phy_sfp_connect_phy(void *upstream, struct phy_device *phy)
{
struct phy_device *phydev = upstream;
- struct phy_link_topology *topo = phy_get_link_topology(phydev);
+ struct net_device *dev = phydev->attached_dev;

- if (topo)
- return phy_link_topo_add_phy(topo, phy, PHY_UPSTREAM_PHY, phydev);
+ if (dev)
+ return phy_link_topo_add_phy(dev, phy, PHY_UPSTREAM_PHY, phydev);

return 0;
}
@@ -1411,10 +1403,10 @@ EXPORT_SYMBOL(phy_sfp_connect_phy);
void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy)
{
struct phy_device *phydev = upstream;
- struct phy_link_topology *topo = phy_get_link_topology(phydev);
+ struct net_device *dev = phydev->attached_dev;

- if (topo)
- phy_link_topo_del_phy(topo, phy);
+ if (dev)
+ phy_link_topo_del_phy(dev, phy);
}
EXPORT_SYMBOL(phy_sfp_disconnect_phy);

@@ -1561,8 +1553,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
if (phydev->sfp_bus_attached)
dev->sfp_bus = phydev->sfp_bus;

- err = phy_link_topo_add_phy(dev->link_topo, phydev,
- PHY_UPSTREAM_MAC, dev);
+ err = phy_link_topo_add_phy(dev, phydev, PHY_UPSTREAM_MAC, dev);
if (err)
goto error;
}
@@ -1992,7 +1983,7 @@ void phy_detach(struct phy_device *phydev)
if (dev) {
phydev->attached_dev->phydev = NULL;
phydev->attached_dev = NULL;
- phy_link_topo_del_phy(dev->link_topo, phydev);
+ phy_link_topo_del_phy(dev, phydev);
}
phydev->phylink = NULL;

diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c
index 985941c5c558..0e36bd7c15dc 100644
--- a/drivers/net/phy/phy_link_topology.c
+++ b/drivers/net/phy/phy_link_topology.c
@@ -35,10 +35,11 @@ void phy_link_topo_destroy(struct phy_link_topology *topo)
kfree(topo);
}

-int phy_link_topo_add_phy(struct phy_link_topology *topo,
+int phy_link_topo_add_phy(struct net_device *dev,
struct phy_device *phy,
enum phy_upstream upt, void *upstream)
{
+ struct phy_link_topology *topo = dev->link_topo;
struct phy_device_node *pdn;
int ret;

@@ -90,10 +91,16 @@ int phy_link_topo_add_phy(struct phy_link_topology *topo,
}
EXPORT_SYMBOL_GPL(phy_link_topo_add_phy);

-void phy_link_topo_del_phy(struct phy_link_topology *topo,
+void phy_link_topo_del_phy(struct net_device *dev,
struct phy_device *phy)
{
- struct phy_device_node *pdn = xa_erase(&topo->phys, phy->phyindex);
+ struct phy_link_topology *topo = dev->link_topo;
+ struct phy_device_node *pdn;
+
+ if (!topo)
+ return;
+
+ pdn = xa_erase(&topo->phys, phy->phyindex);

/* We delete the PHY from the topology, however we don't re-set the
* phy->phyindex field. If the PHY isn't gone, we can re-assign it the
diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h
index 6b79feb607e7..166a01710aa2 100644
--- a/include/linux/phy_link_topology.h
+++ b/include/linux/phy_link_topology.h
@@ -12,11 +12,11 @@
#define __PHY_LINK_TOPOLOGY_H

#include <linux/ethtool.h>
+#include <linux/netdevice.h>
#include <linux/phy_link_topology_core.h>

struct xarray;
struct phy_device;
-struct net_device;
struct sfp_bus;

struct phy_device_node {
@@ -37,11 +37,16 @@ struct phy_link_topology {
u32 next_phy_index;
};

-static inline struct phy_device *
-phy_link_topo_get_phy(struct phy_link_topology *topo, u32 phyindex)
+static inline struct phy_device
+*phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
{
- struct phy_device_node *pdn = xa_load(&topo->phys, phyindex);
+ struct phy_link_topology *topo = dev->link_topo;
+ struct phy_device_node *pdn;

+ if (!topo)
+ return NULL;
+
+ pdn = xa_load(&topo->phys, phyindex);
if (pdn)
return pdn->phy;

@@ -49,21 +54,21 @@ phy_link_topo_get_phy(struct phy_link_topology *topo, u32 phyindex)
}

#if IS_REACHABLE(CONFIG_PHYLIB)
-int phy_link_topo_add_phy(struct phy_link_topology *topo,
+int phy_link_topo_add_phy(struct net_device *dev,
struct phy_device *phy,
enum phy_upstream upt, void *upstream);

-void phy_link_topo_del_phy(struct phy_link_topology *lt, struct phy_device *phy);
+void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);

#else
-static inline int phy_link_topo_add_phy(struct phy_link_topology *topo,
+static inline int phy_link_topo_add_phy(struct net_device *dev,
struct phy_device *phy,
enum phy_upstream upt, void *upstream)
{
return 0;
}

-static inline void phy_link_topo_del_phy(struct phy_link_topology *topo,
+static inline void phy_link_topo_del_phy(struct net_device *dev,
struct phy_device *phy)
{
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 563e94e0cbd8..f5b4adf324bc 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -170,7 +170,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
struct nlattr *phy_id;

phy_id = tb[ETHTOOL_A_HEADER_PHY_INDEX];
- phydev = phy_link_topo_get_phy(dev->link_topo,
+ phydev = phy_link_topo_get_phy(dev,
nla_get_u32(phy_id));
if (!phydev) {
NL_SET_BAD_ATTR(extack, phy_id);
--
2.44.0


2024-05-07 10:56:58

by Maxime Chevallier

[permalink] [raw]
Subject: [PATCH net-next 2/2] net: phy: phy_link_topology: Lazy-initialize the link topology

Having the net_device's init path for the link_topology depend on
IS_REACHABLE(PHYLIB)-protected helpers triggers errors when modules are being
built with phylib as a module as-well, as they expect netdev->link_topo
to be initialized.

Move the link_topo initialization at the first PHY insertion, which will
both improve the memory usage, and make the behaviour more predicatble
and robust.

Signed-off-by: Maxime Chevallier <[email protected]>
Fixes: 6916e461e793 ("net: phy: Introduce ethernet link topology representation")
Closes: https://lore.kernel.org/netdev/[email protected]/
Closes: https://lore.kernel.org/netdev/[email protected]/
---
drivers/net/phy/phy_link_topology.c | 31 ++++++---------------
include/linux/netdevice.h | 2 ++
include/linux/phy_link_topology.h | 23 ++++++++--------
include/linux/phy_link_topology_core.h | 23 +++-------------
net/core/dev.c | 38 ++++++++++++++++++++++----
5 files changed, 58 insertions(+), 59 deletions(-)

diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c
index 0e36bd7c15dc..b1aba9313e73 100644
--- a/drivers/net/phy/phy_link_topology.c
+++ b/drivers/net/phy/phy_link_topology.c
@@ -12,29 +12,6 @@
#include <linux/rtnetlink.h>
#include <linux/xarray.h>

-struct phy_link_topology *phy_link_topo_create(struct net_device *dev)
-{
- struct phy_link_topology *topo;
-
- topo = kzalloc(sizeof(*topo), GFP_KERNEL);
- if (!topo)
- return ERR_PTR(-ENOMEM);
-
- xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1);
- topo->next_phy_index = 1;
-
- return topo;
-}
-
-void phy_link_topo_destroy(struct phy_link_topology *topo)
-{
- if (!topo)
- return;
-
- xa_destroy(&topo->phys);
- kfree(topo);
-}
-
int phy_link_topo_add_phy(struct net_device *dev,
struct phy_device *phy,
enum phy_upstream upt, void *upstream)
@@ -43,6 +20,14 @@ int phy_link_topo_add_phy(struct net_device *dev,
struct phy_device_node *pdn;
int ret;

+ if (!topo) {
+ ret = netdev_alloc_phy_link_topology(dev);
+ if (ret)
+ return ret;
+
+ topo = dev->link_topo;
+ }
+
pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
if (!pdn)
return -ENOMEM;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf261fb89d73..25a0a77cfadc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4569,6 +4569,8 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
const unsigned char *));
void __hw_addr_init(struct netdev_hw_addr_list *list);

+int netdev_alloc_phy_link_topology(struct net_device *dev);
+
/* Functions used for device addresses handling */
void dev_addr_mod(struct net_device *dev, unsigned int offset,
const void *addr, size_t len);
diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h
index 166a01710aa2..3501f9a9e932 100644
--- a/include/linux/phy_link_topology.h
+++ b/include/linux/phy_link_topology.h
@@ -32,10 +32,12 @@ struct phy_device_node {
struct phy_device *phy;
};

-struct phy_link_topology {
- struct xarray phys;
- u32 next_phy_index;
-};
+#if IS_ENABLED(CONFIG_PHYLIB)
+int phy_link_topo_add_phy(struct net_device *dev,
+ struct phy_device *phy,
+ enum phy_upstream upt, void *upstream);
+
+void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);

static inline struct phy_device
*phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
@@ -53,13 +55,6 @@ static inline struct phy_device
return NULL;
}

-#if IS_REACHABLE(CONFIG_PHYLIB)
-int phy_link_topo_add_phy(struct net_device *dev,
- struct phy_device *phy,
- enum phy_upstream upt, void *upstream);
-
-void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);
-
#else
static inline int phy_link_topo_add_phy(struct net_device *dev,
struct phy_device *phy,
@@ -72,6 +67,12 @@ static inline void phy_link_topo_del_phy(struct net_device *dev,
struct phy_device *phy)
{
}
+
+static inline struct phy_device *
+phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
+{
+ return NULL;
+}
#endif

#endif /* __PHY_LINK_TOPOLOGY_H */
diff --git a/include/linux/phy_link_topology_core.h b/include/linux/phy_link_topology_core.h
index 0a6479055745..f9c0520806fb 100644
--- a/include/linux/phy_link_topology_core.h
+++ b/include/linux/phy_link_topology_core.h
@@ -2,24 +2,9 @@
#ifndef __PHY_LINK_TOPOLOGY_CORE_H
#define __PHY_LINK_TOPOLOGY_CORE_H

-struct phy_link_topology;
-
-#if IS_REACHABLE(CONFIG_PHYLIB)
-
-struct phy_link_topology *phy_link_topo_create(struct net_device *dev);
-void phy_link_topo_destroy(struct phy_link_topology *topo);
-
-#else
-
-static inline struct phy_link_topology *phy_link_topo_create(struct net_device *dev)
-{
- return NULL;
-}
-
-static inline void phy_link_topo_destroy(struct phy_link_topology *topo)
-{
-}
-
-#endif
+struct phy_link_topology {
+ struct xarray phys;
+ u32 next_phy_index;
+};

#endif /* __PHY_LINK_TOPOLOGY_CORE_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index d2ce91a334c1..1b4ffc273a04 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10256,6 +10256,35 @@ static void netdev_do_free_pcpu_stats(struct net_device *dev)
}
}

+int netdev_alloc_phy_link_topology(struct net_device *dev)
+{
+ struct phy_link_topology *topo;
+
+ topo = kzalloc(sizeof(*topo), GFP_KERNEL);
+ if (!topo)
+ return -ENOMEM;
+
+ xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1);
+ topo->next_phy_index = 1;
+
+ dev->link_topo = topo;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_alloc_phy_link_topology);
+
+static void netdev_free_phy_link_topology(struct net_device *dev)
+{
+ struct phy_link_topology *topo = dev->link_topo;
+
+ if (!topo)
+ return;
+
+ xa_destroy(&topo->phys);
+ kfree(topo);
+ dev->link_topo = NULL;
+}
+
/**
* register_netdevice() - register a network device
* @dev: device to register
@@ -10998,11 +11027,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
#ifdef CONFIG_NET_SCHED
hash_init(dev->qdisc_hash);
#endif
- dev->link_topo = phy_link_topo_create(dev);
- if (IS_ERR(dev->link_topo)) {
- dev->link_topo = NULL;
- goto free_all;
- }

dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -11092,7 +11116,9 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;

- phy_link_topo_destroy(dev->link_topo);
+#if IS_ENABLED(CONFIG_PHYLIB)
+ netdev_free_phy_link_topology(dev);
+#endif

/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED ||
--
2.44.0


2024-05-07 23:30:11

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH net-next 2/2] net: phy: phy_link_topology: Lazy-initialize the link topology

Hi Maxime,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url: https://github.com/intel-lab-lkp/linux/commits/Maxime-Chevallier/net-phy-phy_link_topology-Pass-netdevice-to-phy_link_topo-helpers/20240507-183130
base: net-next/main
patch link: https://lore.kernel.org/r/20240507102822.2023826-3-maxime.chevallier%40bootlin.com
patch subject: [PATCH net-next 2/2] net: phy: phy_link_topology: Lazy-initialize the link topology
config: parisc-defconfig (https://download.01.org/0day-ci/archive/20240508/[email protected]/config)
compiler: hppa-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240508/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

>> net/core/dev.c:10276:13: warning: 'netdev_free_phy_link_topology' defined but not used [-Wunused-function]
10276 | static void netdev_free_phy_link_topology(struct net_device *dev)
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +/netdev_free_phy_link_topology +10276 net/core/dev.c

10275
10276 static void netdev_free_phy_link_topology(struct net_device *dev)
10277 {
10278 struct phy_link_topology *topo = dev->link_topo;
10279
10280 if (!topo)
10281 return;
10282
10283 xa_destroy(&topo->phys);
10284 kfree(topo);
10285 dev->link_topo = NULL;
10286 }
10287

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-05-08 05:45:33

by Heiner Kallweit

[permalink] [raw]
Subject: Re: [PATCH net-next 1/2] net: phy: phy_link_topology: Pass netdevice to phy_link_topo helpers

On 07.05.2024 12:28, Maxime Chevallier wrote:
> The phy link topology's main goal is to better track which PHYs are
> connected to a given netdevice. Make so that the helpers take the
> netdevice as a parameter directly.
>
The commit message should explain what the issue is that you're fixing,
and how this patch fixes it.

> Signed-off-by: Maxime Chevallier <[email protected]>
> Fixes: 6916e461e793 ("net: phy: Introduce ethernet link topology representation")
> Closes: https://lore.kernel.org/netdev/[email protected]/
> Closes: https://lore.kernel.org/netdev/[email protected]/
> ---
> drivers/net/phy/phy_device.c | 25 ++++++++-----------------
> drivers/net/phy/phy_link_topology.c | 13 ++++++++++---
> include/linux/phy_link_topology.h | 21 +++++++++++++--------
> net/ethtool/netlink.c | 2 +-
> 4 files changed, 32 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
> index 616bd7ba46cb..111434201545 100644
> --- a/drivers/net/phy/phy_device.c
> +++ b/drivers/net/phy/phy_device.c
> @@ -277,14 +277,6 @@ static void phy_mdio_device_remove(struct mdio_device *mdiodev)
>
> static struct phy_driver genphy_driver;
>
> -static struct phy_link_topology *phy_get_link_topology(struct phy_device *phydev)
> -{
> - if (phydev->attached_dev)
> - return phydev->attached_dev->link_topo;
> -
> - return NULL;
> -}
> -
> static LIST_HEAD(phy_fixup_list);
> static DEFINE_MUTEX(phy_fixup_lock);
>
> @@ -1389,10 +1381,10 @@ static DEVICE_ATTR_RO(phy_standalone);
> int phy_sfp_connect_phy(void *upstream, struct phy_device *phy)
> {
> struct phy_device *phydev = upstream;
> - struct phy_link_topology *topo = phy_get_link_topology(phydev);
> + struct net_device *dev = phydev->attached_dev;
>
> - if (topo)
> - return phy_link_topo_add_phy(topo, phy, PHY_UPSTREAM_PHY, phydev);
> + if (dev)
> + return phy_link_topo_add_phy(dev, phy, PHY_UPSTREAM_PHY, phydev);
>
> return 0;
> }
> @@ -1411,10 +1403,10 @@ EXPORT_SYMBOL(phy_sfp_connect_phy);
> void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy)
> {
> struct phy_device *phydev = upstream;
> - struct phy_link_topology *topo = phy_get_link_topology(phydev);
> + struct net_device *dev = phydev->attached_dev;
>
> - if (topo)
> - phy_link_topo_del_phy(topo, phy);
> + if (dev)
> + phy_link_topo_del_phy(dev, phy);
> }
> EXPORT_SYMBOL(phy_sfp_disconnect_phy);
>
> @@ -1561,8 +1553,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
> if (phydev->sfp_bus_attached)
> dev->sfp_bus = phydev->sfp_bus;
>
> - err = phy_link_topo_add_phy(dev->link_topo, phydev,
> - PHY_UPSTREAM_MAC, dev);
> + err = phy_link_topo_add_phy(dev, phydev, PHY_UPSTREAM_MAC, dev);
> if (err)
> goto error;
> }
> @@ -1992,7 +1983,7 @@ void phy_detach(struct phy_device *phydev)
> if (dev) {
> phydev->attached_dev->phydev = NULL;
> phydev->attached_dev = NULL;
> - phy_link_topo_del_phy(dev->link_topo, phydev);
> + phy_link_topo_del_phy(dev, phydev);
> }
> phydev->phylink = NULL;
>
> diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c
> index 985941c5c558..0e36bd7c15dc 100644
> --- a/drivers/net/phy/phy_link_topology.c
> +++ b/drivers/net/phy/phy_link_topology.c
> @@ -35,10 +35,11 @@ void phy_link_topo_destroy(struct phy_link_topology *topo)
> kfree(topo);
> }
>
> -int phy_link_topo_add_phy(struct phy_link_topology *topo,
> +int phy_link_topo_add_phy(struct net_device *dev,
> struct phy_device *phy,
> enum phy_upstream upt, void *upstream)
> {
> + struct phy_link_topology *topo = dev->link_topo;
> struct phy_device_node *pdn;
> int ret;
>
> @@ -90,10 +91,16 @@ int phy_link_topo_add_phy(struct phy_link_topology *topo,
> }
> EXPORT_SYMBOL_GPL(phy_link_topo_add_phy);
>
> -void phy_link_topo_del_phy(struct phy_link_topology *topo,
> +void phy_link_topo_del_phy(struct net_device *dev,
> struct phy_device *phy)
> {
> - struct phy_device_node *pdn = xa_erase(&topo->phys, phy->phyindex);
> + struct phy_link_topology *topo = dev->link_topo;
> + struct phy_device_node *pdn;
> +
> + if (!topo)
> + return;
> +
> + pdn = xa_erase(&topo->phys, phy->phyindex);
>
> /* We delete the PHY from the topology, however we don't re-set the
> * phy->phyindex field. If the PHY isn't gone, we can re-assign it the
> diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h
> index 6b79feb607e7..166a01710aa2 100644
> --- a/include/linux/phy_link_topology.h
> +++ b/include/linux/phy_link_topology.h
> @@ -12,11 +12,11 @@
> #define __PHY_LINK_TOPOLOGY_H
>
> #include <linux/ethtool.h>
> +#include <linux/netdevice.h>
> #include <linux/phy_link_topology_core.h>
>
> struct xarray;
> struct phy_device;
> -struct net_device;
> struct sfp_bus;
>
> struct phy_device_node {
> @@ -37,11 +37,16 @@ struct phy_link_topology {
> u32 next_phy_index;
> };
>
> -static inline struct phy_device *
> -phy_link_topo_get_phy(struct phy_link_topology *topo, u32 phyindex)
> +static inline struct phy_device
> +*phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
> {
> - struct phy_device_node *pdn = xa_load(&topo->phys, phyindex);
> + struct phy_link_topology *topo = dev->link_topo;
> + struct phy_device_node *pdn;
>
> + if (!topo)
> + return NULL;
> +
> + pdn = xa_load(&topo->phys, phyindex);
> if (pdn)
> return pdn->phy;
>
> @@ -49,21 +54,21 @@ phy_link_topo_get_phy(struct phy_link_topology *topo, u32 phyindex)
> }
>
> #if IS_REACHABLE(CONFIG_PHYLIB)
> -int phy_link_topo_add_phy(struct phy_link_topology *topo,
> +int phy_link_topo_add_phy(struct net_device *dev,
> struct phy_device *phy,
> enum phy_upstream upt, void *upstream);
>
> -void phy_link_topo_del_phy(struct phy_link_topology *lt, struct phy_device *phy);
> +void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);
>
> #else
> -static inline int phy_link_topo_add_phy(struct phy_link_topology *topo,
> +static inline int phy_link_topo_add_phy(struct net_device *dev,
> struct phy_device *phy,
> enum phy_upstream upt, void *upstream)
> {
> return 0;
> }
>
> -static inline void phy_link_topo_del_phy(struct phy_link_topology *topo,
> +static inline void phy_link_topo_del_phy(struct net_device *dev,
> struct phy_device *phy)
> {
> }
> diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
> index 563e94e0cbd8..f5b4adf324bc 100644
> --- a/net/ethtool/netlink.c
> +++ b/net/ethtool/netlink.c
> @@ -170,7 +170,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
> struct nlattr *phy_id;
>
> phy_id = tb[ETHTOOL_A_HEADER_PHY_INDEX];
> - phydev = phy_link_topo_get_phy(dev->link_topo,
> + phydev = phy_link_topo_get_phy(dev,
> nla_get_u32(phy_id));
> if (!phydev) {
> NL_SET_BAD_ATTR(extack, phy_id);



2024-05-08 05:46:02

by Heiner Kallweit

[permalink] [raw]
Subject: Re: [PATCH net-next 2/2] net: phy: phy_link_topology: Lazy-initialize the link topology

On 07.05.2024 12:28, Maxime Chevallier wrote:
> Having the net_device's init path for the link_topology depend on
> IS_REACHABLE(PHYLIB)-protected helpers triggers errors when modules are being
> built with phylib as a module as-well, as they expect netdev->link_topo
> to be initialized.
>
> Move the link_topo initialization at the first PHY insertion, which will
> both improve the memory usage, and make the behaviour more predicatble
> and robust.
>
> Signed-off-by: Maxime Chevallier <[email protected]>
> Fixes: 6916e461e793 ("net: phy: Introduce ethernet link topology representation")
> Closes: https://lore.kernel.org/netdev/[email protected]/
> Closes: https://lore.kernel.org/netdev/[email protected]/
> ---
> drivers/net/phy/phy_link_topology.c | 31 ++++++---------------
> include/linux/netdevice.h | 2 ++
> include/linux/phy_link_topology.h | 23 ++++++++--------
> include/linux/phy_link_topology_core.h | 23 +++-------------
> net/core/dev.c | 38 ++++++++++++++++++++++----
> 5 files changed, 58 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c
> index 0e36bd7c15dc..b1aba9313e73 100644
> --- a/drivers/net/phy/phy_link_topology.c
> +++ b/drivers/net/phy/phy_link_topology.c
> @@ -12,29 +12,6 @@
> #include <linux/rtnetlink.h>
> #include <linux/xarray.h>
>
> -struct phy_link_topology *phy_link_topo_create(struct net_device *dev)
> -{
> - struct phy_link_topology *topo;
> -
> - topo = kzalloc(sizeof(*topo), GFP_KERNEL);
> - if (!topo)
> - return ERR_PTR(-ENOMEM);
> -
> - xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1);
> - topo->next_phy_index = 1;
> -
> - return topo;
> -}
> -
> -void phy_link_topo_destroy(struct phy_link_topology *topo)
> -{
> - if (!topo)
> - return;
> -
> - xa_destroy(&topo->phys);
> - kfree(topo);
> -}
> -
> int phy_link_topo_add_phy(struct net_device *dev,
> struct phy_device *phy,
> enum phy_upstream upt, void *upstream)
> @@ -43,6 +20,14 @@ int phy_link_topo_add_phy(struct net_device *dev,
> struct phy_device_node *pdn;
> int ret;
>
> + if (!topo) {
> + ret = netdev_alloc_phy_link_topology(dev);

This function is implemented in net core, but used only here.
So move the implementation here?

> + if (ret)
> + return ret;
> +
> + topo = dev->link_topo;
> + }
> +
> pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
> if (!pdn)
> return -ENOMEM;
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index cf261fb89d73..25a0a77cfadc 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -4569,6 +4569,8 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
> const unsigned char *));
> void __hw_addr_init(struct netdev_hw_addr_list *list);
>
> +int netdev_alloc_phy_link_topology(struct net_device *dev);
> +
> /* Functions used for device addresses handling */
> void dev_addr_mod(struct net_device *dev, unsigned int offset,
> const void *addr, size_t len);
> diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h
> index 166a01710aa2..3501f9a9e932 100644
> --- a/include/linux/phy_link_topology.h
> +++ b/include/linux/phy_link_topology.h
> @@ -32,10 +32,12 @@ struct phy_device_node {
> struct phy_device *phy;
> };
>
> -struct phy_link_topology {
> - struct xarray phys;
> - u32 next_phy_index;
> -};
> +#if IS_ENABLED(CONFIG_PHYLIB)
> +int phy_link_topo_add_phy(struct net_device *dev,
> + struct phy_device *phy,
> + enum phy_upstream upt, void *upstream);
> +
> +void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);
>
> static inline struct phy_device
> *phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
> @@ -53,13 +55,6 @@ static inline struct phy_device
> return NULL;
> }
>
> -#if IS_REACHABLE(CONFIG_PHYLIB)
> -int phy_link_topo_add_phy(struct net_device *dev,
> - struct phy_device *phy,
> - enum phy_upstream upt, void *upstream);
> -
> -void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);
> -
> #else
> static inline int phy_link_topo_add_phy(struct net_device *dev,
> struct phy_device *phy,
> @@ -72,6 +67,12 @@ static inline void phy_link_topo_del_phy(struct net_device *dev,
> struct phy_device *phy)
> {
> }
> +
> +static inline struct phy_device *
> +phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
> +{
> + return NULL;
> +}
> #endif
>
> #endif /* __PHY_LINK_TOPOLOGY_H */
> diff --git a/include/linux/phy_link_topology_core.h b/include/linux/phy_link_topology_core.h
> index 0a6479055745..f9c0520806fb 100644
> --- a/include/linux/phy_link_topology_core.h
> +++ b/include/linux/phy_link_topology_core.h
> @@ -2,24 +2,9 @@
> #ifndef __PHY_LINK_TOPOLOGY_CORE_H
> #define __PHY_LINK_TOPOLOGY_CORE_H
>
> -struct phy_link_topology;
> -
> -#if IS_REACHABLE(CONFIG_PHYLIB)
> -
> -struct phy_link_topology *phy_link_topo_create(struct net_device *dev);
> -void phy_link_topo_destroy(struct phy_link_topology *topo);
> -
> -#else
> -
> -static inline struct phy_link_topology *phy_link_topo_create(struct net_device *dev)
> -{
> - return NULL;
> -}
> -
> -static inline void phy_link_topo_destroy(struct phy_link_topology *topo)
> -{
> -}
> -
> -#endif
> +struct phy_link_topology {
> + struct xarray phys;
> + u32 next_phy_index;
> +};
>
This is all which is left in this header. As this header is public anyway,
better move this definition to phy_link_topology.h?

> #endif /* __PHY_LINK_TOPOLOGY_CORE_H */
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d2ce91a334c1..1b4ffc273a04 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -10256,6 +10256,35 @@ static void netdev_do_free_pcpu_stats(struct net_device *dev)
> }
> }
>
> +int netdev_alloc_phy_link_topology(struct net_device *dev)
> +{
> + struct phy_link_topology *topo;
> +
> + topo = kzalloc(sizeof(*topo), GFP_KERNEL);
> + if (!topo)
> + return -ENOMEM;
> +
> + xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1);
> + topo->next_phy_index = 1;
> +
> + dev->link_topo = topo;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(netdev_alloc_phy_link_topology);
> +
> +static void netdev_free_phy_link_topology(struct net_device *dev)
> +{
> + struct phy_link_topology *topo = dev->link_topo;
> +
> + if (!topo)
> + return;
> +
> + xa_destroy(&topo->phys);
> + kfree(topo);
> + dev->link_topo = NULL;

Give the compiler a chance to remove this function if
CONFIG_PHYLIB isn't enabled.

if (IS_ENABLED(CONFIG_PHYLIB) && topo) {
xa_destroy(&topo->phys);
kfree(topo);
dev->link_topo = NULL;
}

> +}
> +
> /**
> * register_netdevice() - register a network device
> * @dev: device to register
> @@ -10998,11 +11027,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
> #ifdef CONFIG_NET_SCHED
> hash_init(dev->qdisc_hash);
> #endif
> - dev->link_topo = phy_link_topo_create(dev);
> - if (IS_ERR(dev->link_topo)) {
> - dev->link_topo = NULL;
> - goto free_all;
> - }
>
> dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
> setup(dev);
> @@ -11092,7 +11116,9 @@ void free_netdev(struct net_device *dev)
> free_percpu(dev->xdp_bulkq);
> dev->xdp_bulkq = NULL;
>
> - phy_link_topo_destroy(dev->link_topo);
> +#if IS_ENABLED(CONFIG_PHYLIB)
> + netdev_free_phy_link_topology(dev);
> +#endif
>
Then the conditional compiling can be removed here.

> /* Compatibility with error handling in drivers */
> if (dev->reg_state == NETREG_UNINITIALIZED ||





2024-05-13 06:36:50

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH net-next 0/2] Fix phy_link_topology initialization

Hi Maxime,

On Tue, May 07, 2024 at 12:28:19PM +0200, Maxime Chevallier wrote:
> Nathan and Heiner reported issues that occur when phylib and phy drivers
> built as modules expect the phy_link_topology to be initialized, due to
> wrong use of IS_REACHABLE.
>
> This small fixup series addresses that by moving the initialization code
> into net/core/dev.c, but at the same time implementing lazy
> initialization to only allocate the topology upon the first PHY
> insertion.
>
> This needed some refactoring, namely pass the netdevice itself as a
> parameter for phy_link_topology helpers.
>
> Thanks Heiner for the help on untangling this, and Nathan for the
> report.

Are you able to prioritize getting this series merged? This has been a
problem in -next for over a month now and the merge window is now open.
I would hate to see this regress in mainline, as my main system may be
affected by it (not sure, I got a new test machine that got bit by it in
addition to the other two I noticed it on).

Cheers,
Nathan

2024-05-13 07:31:03

by Maxime Chevallier

[permalink] [raw]
Subject: Re: [PATCH net-next 2/2] net: phy: phy_link_topology: Lazy-initialize the link topology

Hi Heiner,

On Wed, 8 May 2024 07:44:22 +0200
Heiner Kallweit <[email protected]> wrote:

> On 07.05.2024 12:28, Maxime Chevallier wrote:
> > Having the net_device's init path for the link_topology depend on
> > IS_REACHABLE(PHYLIB)-protected helpers triggers errors when modules are being
> > built with phylib as a module as-well, as they expect netdev->link_topo
> > to be initialized.
> >
> > Move the link_topo initialization at the first PHY insertion, which will
> > both improve the memory usage, and make the behaviour more predicatble
> > and robust.
> >
> > Signed-off-by: Maxime Chevallier <[email protected]>
> > Fixes: 6916e461e793 ("net: phy: Introduce ethernet link topology representation")
> > Closes: https://lore.kernel.org/netdev/[email protected]/
> > Closes: https://lore.kernel.org/netdev/[email protected]/
> > ---
> > drivers/net/phy/phy_link_topology.c | 31 ++++++---------------
> > include/linux/netdevice.h | 2 ++
> > include/linux/phy_link_topology.h | 23 ++++++++--------
> > include/linux/phy_link_topology_core.h | 23 +++-------------
> > net/core/dev.c | 38 ++++++++++++++++++++++----
> > 5 files changed, 58 insertions(+), 59 deletions(-)
> >
> > diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c
> > index 0e36bd7c15dc..b1aba9313e73 100644
> > --- a/drivers/net/phy/phy_link_topology.c
> > +++ b/drivers/net/phy/phy_link_topology.c
> > @@ -12,29 +12,6 @@
> > #include <linux/rtnetlink.h>
> > #include <linux/xarray.h>
> >
> > -struct phy_link_topology *phy_link_topo_create(struct net_device *dev)
> > -{
> > - struct phy_link_topology *topo;
> > -
> > - topo = kzalloc(sizeof(*topo), GFP_KERNEL);
> > - if (!topo)
> > - return ERR_PTR(-ENOMEM);
> > -
> > - xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1);
> > - topo->next_phy_index = 1;
> > -
> > - return topo;
> > -}
> > -
> > -void phy_link_topo_destroy(struct phy_link_topology *topo)
> > -{
> > - if (!topo)
> > - return;
> > -
> > - xa_destroy(&topo->phys);
> > - kfree(topo);
> > -}
> > -
> > int phy_link_topo_add_phy(struct net_device *dev,
> > struct phy_device *phy,
> > enum phy_upstream upt, void *upstream)
> > @@ -43,6 +20,14 @@ int phy_link_topo_add_phy(struct net_device *dev,
> > struct phy_device_node *pdn;
> > int ret;
> >
> > + if (!topo) {
> > + ret = netdev_alloc_phy_link_topology(dev);
>
> This function is implemented in net core, but used only here.
> So move the implementation here?

If it's OK not to have both helpers to alloc and destroy in different
files, then I'll move it :)

>
> > + if (ret)
> > + return ret;
> > +
> > + topo = dev->link_topo;
> > + }
> > +
> > pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
> > if (!pdn)
> > return -ENOMEM;
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index cf261fb89d73..25a0a77cfadc 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -4569,6 +4569,8 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
> > const unsigned char *));
> > void __hw_addr_init(struct netdev_hw_addr_list *list);
> >
> > +int netdev_alloc_phy_link_topology(struct net_device *dev);
> > +
> > /* Functions used for device addresses handling */
> > void dev_addr_mod(struct net_device *dev, unsigned int offset,
> > const void *addr, size_t len);
> > diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h
> > index 166a01710aa2..3501f9a9e932 100644
> > --- a/include/linux/phy_link_topology.h
> > +++ b/include/linux/phy_link_topology.h
> > @@ -32,10 +32,12 @@ struct phy_device_node {
> > struct phy_device *phy;
> > };
> >
> > -struct phy_link_topology {
> > - struct xarray phys;
> > - u32 next_phy_index;
> > -};
> > +#if IS_ENABLED(CONFIG_PHYLIB)
> > +int phy_link_topo_add_phy(struct net_device *dev,
> > + struct phy_device *phy,
> > + enum phy_upstream upt, void *upstream);
> > +
> > +void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);
> >
> > static inline struct phy_device
> > *phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
> > @@ -53,13 +55,6 @@ static inline struct phy_device
> > return NULL;
> > }
> >
> > -#if IS_REACHABLE(CONFIG_PHYLIB)
> > -int phy_link_topo_add_phy(struct net_device *dev,
> > - struct phy_device *phy,
> > - enum phy_upstream upt, void *upstream);
> > -
> > -void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);
> > -
> > #else
> > static inline int phy_link_topo_add_phy(struct net_device *dev,
> > struct phy_device *phy,
> > @@ -72,6 +67,12 @@ static inline void phy_link_topo_del_phy(struct net_device *dev,
> > struct phy_device *phy)
> > {
> > }
> > +
> > +static inline struct phy_device *
> > +phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
> > +{
> > + return NULL;
> > +}
> > #endif
> >
> > #endif /* __PHY_LINK_TOPOLOGY_H */
> > diff --git a/include/linux/phy_link_topology_core.h b/include/linux/phy_link_topology_core.h
> > index 0a6479055745..f9c0520806fb 100644
> > --- a/include/linux/phy_link_topology_core.h
> > +++ b/include/linux/phy_link_topology_core.h
> > @@ -2,24 +2,9 @@
> > #ifndef __PHY_LINK_TOPOLOGY_CORE_H
> > #define __PHY_LINK_TOPOLOGY_CORE_H
> >
> > -struct phy_link_topology;
> > -
> > -#if IS_REACHABLE(CONFIG_PHYLIB)
> > -
> > -struct phy_link_topology *phy_link_topo_create(struct net_device *dev);
> > -void phy_link_topo_destroy(struct phy_link_topology *topo);
> > -
> > -#else
> > -
> > -static inline struct phy_link_topology *phy_link_topo_create(struct net_device *dev)
> > -{
> > - return NULL;
> > -}
> > -
> > -static inline void phy_link_topo_destroy(struct phy_link_topology *topo)
> > -{
> > -}
> > -
> > -#endif
> > +struct phy_link_topology {
> > + struct xarray phys;
> > + u32 next_phy_index;
> > +};
> >
> This is all which is left in this header. As this header is public anyway,
> better move this definition to phy_link_topology.h?

Well I'll have to include the whole phy_link_topology.h in
net/core/dev.c, and I was trying to avoid including that whole header,
and keep the included content to a bare minimum.

>
> > #endif /* __PHY_LINK_TOPOLOGY_CORE_H */
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index d2ce91a334c1..1b4ffc273a04 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -10256,6 +10256,35 @@ static void netdev_do_free_pcpu_stats(struct net_device *dev)
> > }
> > }
> >
> > +int netdev_alloc_phy_link_topology(struct net_device *dev)
> > +{
> > + struct phy_link_topology *topo;
> > +
> > + topo = kzalloc(sizeof(*topo), GFP_KERNEL);
> > + if (!topo)
> > + return -ENOMEM;
> > +
> > + xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1);
> > + topo->next_phy_index = 1;
> > +
> > + dev->link_topo = topo;
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(netdev_alloc_phy_link_topology);
> > +
> > +static void netdev_free_phy_link_topology(struct net_device *dev)
> > +{
> > + struct phy_link_topology *topo = dev->link_topo;
> > +
> > + if (!topo)
> > + return;
> > +
> > + xa_destroy(&topo->phys);
> > + kfree(topo);
> > + dev->link_topo = NULL;
>
> Give the compiler a chance to remove this function if
> CONFIG_PHYLIB isn't enabled.
>
> if (IS_ENABLED(CONFIG_PHYLIB) && topo) {
> xa_destroy(&topo->phys);
> kfree(topo);
> dev->link_topo = NULL;
> }

Well if we add more things to the link topology, then it's going to be
easy to forget updating that without clear helpers for alloc/destroy,
don't you think ?

I can try to squeeze another iteration before net-next closes.

Maxime

> > +}
> > +
> > /**
> > * register_netdevice() - register a network device
> > * @dev: device to register
> > @@ -10998,11 +11027,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
> > #ifdef CONFIG_NET_SCHED
> > hash_init(dev->qdisc_hash);
> > #endif
> > - dev->link_topo = phy_link_topo_create(dev);
> > - if (IS_ERR(dev->link_topo)) {
> > - dev->link_topo = NULL;
> > - goto free_all;
> > - }
> >
> > dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
> > setup(dev);
> > @@ -11092,7 +11116,9 @@ void free_netdev(struct net_device *dev)
> > free_percpu(dev->xdp_bulkq);
> > dev->xdp_bulkq = NULL;
> >
> > - phy_link_topo_destroy(dev->link_topo);
> > +#if IS_ENABLED(CONFIG_PHYLIB)
> > + netdev_free_phy_link_topology(dev);
> > +#endif
> >
> Then the conditional compiling can be removed here.
>
> > /* Compatibility with error handling in drivers */
> > if (dev->reg_state == NETREG_UNINITIALIZED ||
>
>
>
>


2024-05-13 08:07:47

by Maxime Chevallier

[permalink] [raw]
Subject: Re: [PATCH net-next 2/2] net: phy: phy_link_topology: Lazy-initialize the link topology

Hello again Heiner,

On Wed, 8 May 2024 07:44:22 +0200
Heiner Kallweit <[email protected]> wrote:

> On 07.05.2024 12:28, Maxime Chevallier wrote:
> > Having the net_device's init path for the link_topology depend on
> > IS_REACHABLE(PHYLIB)-protected helpers triggers errors when modules are being
> > built with phylib as a module as-well, as they expect netdev->link_topo
> > to be initialized.
> >
> > Move the link_topo initialization at the first PHY insertion, which will
> > both improve the memory usage, and make the behaviour more predicatble
> > and robust.

I agree with some of the comments, as stated in my previous mail,
however I'm struggling to find the time to fix, and re-test everything,
especially before net-next closes. Would it be OK if I re-send with a
fix for the kbuild bot warning, improve the commit log as you
mentionned for patch 1 so that at least the issue can be solved ?

I still have the netlink part of this work to send, so I definitely
will have to rework that, but with a bit less time constraints so that
I can properly re-test everything.

Best regards,

Maxime

2024-05-13 08:16:12

by Maxime Chevallier

[permalink] [raw]
Subject: Re: [PATCH net-next 2/2] net: phy: phy_link_topology: Lazy-initialize the link topology

On Mon, 13 May 2024 10:07:29 +0200
Maxime Chevallier <[email protected]> wrote:

> Hello again Heiner,
>
> On Wed, 8 May 2024 07:44:22 +0200
> Heiner Kallweit <[email protected]> wrote:
>
> > On 07.05.2024 12:28, Maxime Chevallier wrote:
> > > Having the net_device's init path for the link_topology depend on
> > > IS_REACHABLE(PHYLIB)-protected helpers triggers errors when modules are being
> > > built with phylib as a module as-well, as they expect netdev->link_topo
> > > to be initialized.
> > >
> > > Move the link_topo initialization at the first PHY insertion, which will
> > > both improve the memory usage, and make the behaviour more predicatble
> > > and robust.
>
> I agree with some of the comments, as stated in my previous mail,
> however I'm struggling to find the time to fix, and re-test everything,
> especially before net-next closes. Would it be OK if I re-send with a
> fix for the kbuild bot warning, improve the commit log as you
> mentionned for patch 1 so that at least the issue can be solved ?
>
> I still have the netlink part of this work to send, so I definitely
> will have to rework that, but with a bit less time constraints so that
> I can properly re-test everything.

To clarify, I'm mostly talking about the merge of
phy_link_topology_core.h into phy_link_topology.h, I fear that this
could get rejected because of the added #include that would clutter a
bit net/core/dev.c with functions that are barely used.

All your other comments make perfect sense to me and I'm testing these
as we speak.

Regards,

Maxime

>
> Best regards,
>
> Maxime


2024-05-13 09:16:28

by Russell King (Oracle)

[permalink] [raw]
Subject: Re: [PATCH net-next 0/2] Fix phy_link_topology initialization

On Sun, May 12, 2024 at 11:36:36PM -0700, Nathan Chancellor wrote:
> Hi Maxime,
>
> On Tue, May 07, 2024 at 12:28:19PM +0200, Maxime Chevallier wrote:
> > Nathan and Heiner reported issues that occur when phylib and phy drivers
> > built as modules expect the phy_link_topology to be initialized, due to
> > wrong use of IS_REACHABLE.
> >
> > This small fixup series addresses that by moving the initialization code
> > into net/core/dev.c, but at the same time implementing lazy
> > initialization to only allocate the topology upon the first PHY
> > insertion.
> >
> > This needed some refactoring, namely pass the netdevice itself as a
> > parameter for phy_link_topology helpers.
> >
> > Thanks Heiner for the help on untangling this, and Nathan for the
> > report.
>
> Are you able to prioritize getting this series merged? This has been a
> problem in -next for over a month now and the merge window is now open.
> I would hate to see this regress in mainline, as my main system may be
> affected by it (not sure, I got a new test machine that got bit by it in
> addition to the other two I noticed it on).

.. and Maxime has been working on trying to get an acceptable fix for
it over that time, with to-and-fro discussions. Maxime still hasn't got
an ack from Heiner for the fixes, and changes are still being
requested.

I think, sadly, the only way forward at this point would be to revert
the original commit. I've just tried reverting 6916e461e793 in my
net-next tree and it's possible, although a little noisy:

$ git revert 6916e461e793
Performing inexact rename detection: 100% (8904/8904), done.
Auto-merging net/core/dev.c
Auto-merging include/uapi/linux/ethtool.h
Removing include/linux/phy_link_topology_core.h
Removing include/linux/phy_link_topology.h
Auto-merging include/linux/phy.h
Auto-merging include/linux/netdevice.h
Removing drivers/net/phy/phy_link_topology.c
Auto-merging drivers/net/phy/phy_device.c
Auto-merging MAINTAINERS
hint: Waiting for your editor to close the file...

I haven't checked whether that ends up with something that's buildable.

Any views Jakub/Dave/Paolo?

--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

2024-05-13 15:11:50

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH net-next 0/2] Fix phy_link_topology initialization

On Mon, 13 May 2024 10:15:48 +0100 Russell King (Oracle) wrote:
> ... and Maxime has been working on trying to get an acceptable fix for
> it over that time, with to-and-fro discussions. Maxime still hasn't got
> an ack from Heiner for the fixes, and changes are still being
> requested.
>
> I think, sadly, the only way forward at this point would be to revert
> the original commit. I've just tried reverting 6916e461e793 in my
> net-next tree and it's possible, although a little noisy:
>
> $ git revert 6916e461e793
> Performing inexact rename detection: 100% (8904/8904), done.
> Auto-merging net/core/dev.c
> Auto-merging include/uapi/linux/ethtool.h
> Removing include/linux/phy_link_topology_core.h
> Removing include/linux/phy_link_topology.h
> Auto-merging include/linux/phy.h
> Auto-merging include/linux/netdevice.h
> Removing drivers/net/phy/phy_link_topology.c
> Auto-merging drivers/net/phy/phy_device.c
> Auto-merging MAINTAINERS
> hint: Waiting for your editor to close the file...
>
> I haven't checked whether that ends up with something that's buildable.
>
> Any views Jakub/Dave/Paolo?

I think you're right. The series got half-merged, we shouldn't push it
into a release in this state. We should revert all of it, I reckon?

6916e461e793 ("net: phy: Introduce ethernet link topology representation")
0ec5ed6c130e ("net: sfp: pass the phy_device when disconnecting an sfp module's PHY")
e75e4e074c44 ("net: phy: add helpers to handle sfp phy connect/disconnect")
fdd353965b52 ("net: sfp: Add helper to return the SFP bus name")
841942bc6212 ("net: ethtool: Allow passing a phy index for some commands")

Does anyone feel strongly that we should try to patch it up instead?

2024-05-14 06:46:45

by Maxime Chevallier

[permalink] [raw]
Subject: Re: [PATCH net-next 0/2] Fix phy_link_topology initialization

Hi Jakub,

On Mon, 13 May 2024 08:11:38 -0700
Jakub Kicinski <[email protected]> wrote:

> On Mon, 13 May 2024 10:15:48 +0100 Russell King (Oracle) wrote:
> > ... and Maxime has been working on trying to get an acceptable fix for
> > it over that time, with to-and-fro discussions. Maxime still hasn't got
> > an ack from Heiner for the fixes, and changes are still being
> > requested.
> >
> > I think, sadly, the only way forward at this point would be to revert
> > the original commit. I've just tried reverting 6916e461e793 in my
> > net-next tree and it's possible, although a little noisy:
> >
> > $ git revert 6916e461e793
> > Performing inexact rename detection: 100% (8904/8904), done.
> > Auto-merging net/core/dev.c
> > Auto-merging include/uapi/linux/ethtool.h
> > Removing include/linux/phy_link_topology_core.h
> > Removing include/linux/phy_link_topology.h
> > Auto-merging include/linux/phy.h
> > Auto-merging include/linux/netdevice.h
> > Removing drivers/net/phy/phy_link_topology.c
> > Auto-merging drivers/net/phy/phy_device.c
> > Auto-merging MAINTAINERS
> > hint: Waiting for your editor to close the file...
> >
> > I haven't checked whether that ends up with something that's buildable.
> >
> > Any views Jakub/Dave/Paolo?
>
> I think you're right. The series got half-merged, we shouldn't push it
> into a release in this state. We should revert all of it, I reckon?
>
> 6916e461e793 ("net: phy: Introduce ethernet link topology representation")
> 0ec5ed6c130e ("net: sfp: pass the phy_device when disconnecting an sfp module's PHY")
> e75e4e074c44 ("net: phy: add helpers to handle sfp phy connect/disconnect")
> fdd353965b52 ("net: sfp: Add helper to return the SFP bus name")
> 841942bc6212 ("net: ethtool: Allow passing a phy index for some commands")
>
> Does anyone feel strongly that we should try to patch it up instead?

It's OK for me, at least this showed some of the shortcomings with the
current code, let's come back with a better version for the next round.

Thanks,

Maxime