Add an auxiliary virtual bus to model the mlx4 driver structure. The
code is added along the current custom device management logic.
Subsequent patches switch mlx4_en and mlx4_ib to the auxiliary bus and
the old interface is then removed.
Structure mlx4_priv gains a new adev dynamic array to keep track of its
auxiliary devices. Access to the array is protected by the global
mlx4_intf mutex.
Functions mlx4_register_device() and mlx4_unregister_device() are
updated to expose auxiliary devices on the bus in order to load mlx4_en
and/or mlx4_ib. Functions mlx4_register_auxiliary_driver() and
mlx4_unregister_auxiliary_driver() are added to substitute
mlx4_register_interface() and mlx4_unregister_interface(), respectively.
Function mlx4_do_bond() is adjusted to walk over the adev array and
re-adds a specific auxiliary device if its driver sets the
MLX4_INTFF_BONDING flag.
Signed-off-by: Petr Pavlu <[email protected]>
Tested-by: Leon Romanovsky <[email protected]>
---
drivers/net/ethernet/mellanox/mlx4/Kconfig | 1 +
drivers/net/ethernet/mellanox/mlx4/intf.c | 230 ++++++++++++++++++++-
drivers/net/ethernet/mellanox/mlx4/main.c | 17 +-
drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 +
include/linux/mlx4/device.h | 7 +
include/linux/mlx4/driver.h | 11 +
6 files changed, 268 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index 1b4b1f642317..825e05fb8607 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -27,6 +27,7 @@ config MLX4_EN_DCB
config MLX4_CORE
tristate
depends on PCI
+ select AUXILIARY_BUS
select NET_DEVLINK
default n
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 30aead34ce08..4b1e18e4a682 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -48,6 +48,89 @@ struct mlx4_device_context {
static LIST_HEAD(intf_list);
static LIST_HEAD(dev_list);
static DEFINE_MUTEX(intf_mutex);
+static DEFINE_IDA(mlx4_adev_ida);
+
+static const struct mlx4_adev_device {
+ const char *suffix;
+ bool (*is_supported)(struct mlx4_dev *dev);
+} mlx4_adev_devices[1] = {};
+
+int mlx4_adev_init(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ priv->adev_idx = ida_alloc(&mlx4_adev_ida, GFP_KERNEL);
+ if (priv->adev_idx < 0)
+ return priv->adev_idx;
+
+ priv->adev = kcalloc(ARRAY_SIZE(mlx4_adev_devices),
+ sizeof(struct mlx4_adev *), GFP_KERNEL);
+ if (!priv->adev) {
+ ida_free(&mlx4_adev_ida, priv->adev_idx);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void mlx4_adev_cleanup(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ kfree(priv->adev);
+ ida_free(&mlx4_adev_ida, priv->adev_idx);
+}
+
+static void adev_release(struct device *dev)
+{
+ struct mlx4_adev *mlx4_adev =
+ container_of(dev, struct mlx4_adev, adev.dev);
+ struct mlx4_priv *priv = mlx4_priv(mlx4_adev->mdev);
+ int idx = mlx4_adev->idx;
+
+ kfree(mlx4_adev);
+ priv->adev[idx] = NULL;
+}
+
+static struct mlx4_adev *add_adev(struct mlx4_dev *dev, int idx)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ const char *suffix = mlx4_adev_devices[idx].suffix;
+ struct auxiliary_device *adev;
+ struct mlx4_adev *madev;
+ int ret;
+
+ madev = kzalloc(sizeof(*madev), GFP_KERNEL);
+ if (!madev)
+ return ERR_PTR(-ENOMEM);
+
+ adev = &madev->adev;
+ adev->id = priv->adev_idx;
+ adev->name = suffix;
+ adev->dev.parent = &dev->persist->pdev->dev;
+ adev->dev.release = adev_release;
+ madev->mdev = dev;
+ madev->idx = idx;
+
+ ret = auxiliary_device_init(adev);
+ if (ret) {
+ kfree(madev);
+ return ERR_PTR(ret);
+ }
+
+ ret = auxiliary_device_add(adev);
+ if (ret) {
+ auxiliary_device_uninit(adev);
+ return ERR_PTR(ret);
+ }
+ return madev;
+}
+
+static void del_adev(struct auxiliary_device *adev)
+{
+ auxiliary_device_delete(adev);
+ auxiliary_device_uninit(adev);
+}
static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
{
@@ -120,12 +203,24 @@ void mlx4_unregister_interface(struct mlx4_interface *intf)
}
EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
+int mlx4_register_auxiliary_driver(struct mlx4_adrv *madrv)
+{
+ return auxiliary_driver_register(&madrv->adrv);
+}
+EXPORT_SYMBOL_GPL(mlx4_register_auxiliary_driver);
+
+void mlx4_unregister_auxiliary_driver(struct mlx4_adrv *madrv)
+{
+ auxiliary_driver_unregister(&madrv->adrv);
+}
+EXPORT_SYMBOL_GPL(mlx4_unregister_auxiliary_driver);
+
int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx;
unsigned long flags;
- int ret;
+ int i, ret;
LIST_HEAD(bond_list);
if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
@@ -177,6 +272,57 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
dev_ctx->intf->protocol, enable ?
"enabled" : "disabled");
}
+
+ mutex_lock(&intf_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(mlx4_adev_devices); i++) {
+ struct mlx4_adev *madev = priv->adev[i];
+ struct mlx4_adrv *madrv;
+ enum mlx4_protocol protocol;
+
+ if (!madev)
+ continue;
+
+ device_lock(&madev->adev.dev);
+ if (!madev->adev.dev.driver) {
+ device_unlock(&madev->adev.dev);
+ continue;
+ }
+
+ madrv = container_of(madev->adev.dev.driver, struct mlx4_adrv,
+ adrv.driver);
+ if (!(madrv->flags & MLX4_INTFF_BONDING)) {
+ device_unlock(&madev->adev.dev);
+ continue;
+ }
+
+ if (mlx4_is_mfunc(dev)) {
+ mlx4_dbg(dev,
+ "SRIOV, disabled HA mode for intf proto %d\n",
+ madrv->protocol);
+ device_unlock(&madev->adev.dev);
+ continue;
+ }
+
+ protocol = madrv->protocol;
+ device_unlock(&madev->adev.dev);
+
+ del_adev(&madev->adev);
+ priv->adev[i] = add_adev(dev, i);
+ if (IS_ERR(priv->adev[i])) {
+ mlx4_warn(dev, "Device[%d] (%s) failed to load\n", i,
+ mlx4_adev_devices[i].suffix);
+ priv->adev[i] = NULL;
+ continue;
+ }
+
+ mlx4_dbg(dev,
+ "Interface for protocol %d restarted with bonded mode %s\n",
+ protocol, enable ? "enabled" : "disabled");
+ }
+
+ mutex_unlock(&intf_mutex);
+
return 0;
}
@@ -206,10 +352,80 @@ int mlx4_unregister_event_notifier(struct mlx4_dev *dev,
}
EXPORT_SYMBOL(mlx4_unregister_event_notifier);
+static int add_drivers(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, ret = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mlx4_adev_devices); i++) {
+ bool is_supported = false;
+
+ if (priv->adev[i])
+ continue;
+
+ if (mlx4_adev_devices[i].is_supported)
+ is_supported = mlx4_adev_devices[i].is_supported(dev);
+
+ if (!is_supported)
+ continue;
+
+ priv->adev[i] = add_adev(dev, i);
+ if (IS_ERR(priv->adev[i])) {
+ mlx4_warn(dev, "Device[%d] (%s) failed to load\n", i,
+ mlx4_adev_devices[i].suffix);
+ /* We continue to rescan drivers and leave to the caller
+ * to make decision if to release everything or
+ * continue. */
+ ret = PTR_ERR(priv->adev[i]);
+ priv->adev[i] = NULL;
+ }
+ }
+ return ret;
+}
+
+static void delete_drivers(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ bool delete_all;
+ int i;
+
+ delete_all = !(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP);
+
+ for (i = ARRAY_SIZE(mlx4_adev_devices) - 1; i >= 0; i--) {
+ bool is_supported = false;
+
+ if (!priv->adev[i])
+ continue;
+
+ if (mlx4_adev_devices[i].is_supported && !delete_all)
+ is_supported = mlx4_adev_devices[i].is_supported(dev);
+
+ if (is_supported)
+ continue;
+
+ del_adev(&priv->adev[i]->adev);
+ priv->adev[i] = NULL;
+ }
+}
+
+/* This function is used after mlx4_dev is reconfigured.
+ */
+static int rescan_drivers_locked(struct mlx4_dev *dev)
+{
+ lockdep_assert_held(&intf_mutex);
+
+ delete_drivers(dev);
+ if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP))
+ return 0;
+
+ return add_drivers(dev);
+}
+
int mlx4_register_device(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
+ int ret;
mutex_lock(&intf_mutex);
@@ -218,10 +434,18 @@ int mlx4_register_device(struct mlx4_dev *dev)
list_for_each_entry(intf, &intf_list, list)
mlx4_add_device(intf, priv);
+ ret = rescan_drivers_locked(dev);
+
mutex_unlock(&intf_mutex);
+
+ if (ret) {
+ mlx4_unregister_device(dev);
+ return ret;
+ }
+
mlx4_start_catas_poll(dev);
- return 0;
+ return ret;
}
void mlx4_unregister_device(struct mlx4_dev *dev)
@@ -253,6 +477,8 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
list_del(&priv->dev_list);
dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP;
+ rescan_drivers_locked(dev);
+
mutex_unlock(&intf_mutex);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 0ed490b99163..c4ec7377aa71 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3429,6 +3429,10 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
+ err = mlx4_adev_init(dev);
+ if (err)
+ return err;
+
ATOMIC_INIT_NOTIFIER_HEAD(&priv->event_nh);
mutex_init(&priv->port_mutex);
@@ -3455,10 +3459,11 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
err = mlx4_get_ownership(dev);
if (err) {
if (err < 0)
- return err;
+ goto err_adev;
else {
mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
- return -EINVAL;
+ err = -EINVAL;
+ goto err_adev;
}
}
@@ -3806,6 +3811,9 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
mlx4_free_ownership(dev);
kfree(dev_cap);
+
+err_adev:
+ mlx4_adev_cleanup(dev);
return err;
}
@@ -4186,6 +4194,8 @@ static void mlx4_unload_one(struct pci_dev *pdev)
mlx4_slave_destroy_special_qp_cap(dev);
kfree(dev->dev_vfs);
+ mlx4_adev_cleanup(dev);
+
mlx4_clean_dev(dev);
priv->pci_dev_data = pci_dev_data;
priv->removed = 1;
@@ -4573,6 +4583,9 @@ static int __init mlx4_init(void)
{
int ret;
+ WARN_ONCE(strcmp(MLX4_ADEV_NAME, KBUILD_MODNAME),
+ "mlx4_core name not in sync with kernel module name");
+
if (mlx4_verify_params())
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index ece9acb6a869..d5050bfb342f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -47,6 +47,7 @@
#include <linux/spinlock.h>
#include <net/devlink.h>
#include <linux/rwsem.h>
+#include <linux/auxiliary_bus.h>
#include <linux/notifier.h>
#include <linux/mlx4/device.h>
@@ -884,6 +885,8 @@ struct mlx4_priv {
struct list_head dev_list;
struct list_head ctx_list;
spinlock_t ctx_lock;
+ struct mlx4_adev **adev;
+ int adev_idx;
struct atomic_notifier_head event_nh;
int pci_dev_data;
@@ -1052,6 +1055,9 @@ void mlx4_catas_end(struct mlx4_dev *dev);
int mlx4_crdump_init(struct mlx4_dev *dev);
void mlx4_crdump_end(struct mlx4_dev *dev);
int mlx4_restart_one(struct pci_dev *pdev);
+
+int mlx4_adev_init(struct mlx4_dev *dev);
+void mlx4_adev_cleanup(struct mlx4_dev *dev);
int mlx4_register_device(struct mlx4_dev *dev);
void mlx4_unregister_device(struct mlx4_dev *dev);
void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 049d8a4b044d..27f42f713c89 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -33,6 +33,7 @@
#ifndef MLX4_DEVICE_H
#define MLX4_DEVICE_H
+#include <linux/auxiliary_bus.h>
#include <linux/if_ether.h>
#include <linux/pci.h>
#include <linux/completion.h>
@@ -889,6 +890,12 @@ struct mlx4_dev {
u8 uar_page_shift;
};
+struct mlx4_adev {
+ struct auxiliary_device adev;
+ struct mlx4_dev *mdev;
+ int idx;
+};
+
struct mlx4_clock_params {
u64 offset;
u8 bar;
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 781d5a0c2faa..9cf157d381c6 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -34,9 +34,12 @@
#define MLX4_DRIVER_H
#include <net/devlink.h>
+#include <linux/auxiliary_bus.h>
#include <linux/notifier.h>
#include <linux/mlx4/device.h>
+#define MLX4_ADEV_NAME "mlx4_core"
+
struct mlx4_dev;
#define MLX4_MAC_MASK 0xffffffffffffULL
@@ -63,8 +66,16 @@ struct mlx4_interface {
int flags;
};
+struct mlx4_adrv {
+ struct auxiliary_driver adrv;
+ enum mlx4_protocol protocol;
+ int flags;
+};
+
int mlx4_register_interface(struct mlx4_interface *intf);
void mlx4_unregister_interface(struct mlx4_interface *intf);
+int mlx4_register_auxiliary_driver(struct mlx4_adrv *madrv);
+void mlx4_unregister_auxiliary_driver(struct mlx4_adrv *madrv);
int mlx4_register_event_notifier(struct mlx4_dev *dev,
struct notifier_block *nb);
--
2.35.3
在 2023/8/4 23:05, Petr Pavlu 写道:
> Add an auxiliary virtual bus to model the mlx4 driver structure. The
> code is added along the current custom device management logic.
> Subsequent patches switch mlx4_en and mlx4_ib to the auxiliary bus and
> the old interface is then removed.
>
> Structure mlx4_priv gains a new adev dynamic array to keep track of its
> auxiliary devices. Access to the array is protected by the global
> mlx4_intf mutex.
>
> Functions mlx4_register_device() and mlx4_unregister_device() are
> updated to expose auxiliary devices on the bus in order to load mlx4_en
> and/or mlx4_ib. Functions mlx4_register_auxiliary_driver() and
> mlx4_unregister_auxiliary_driver() are added to substitute
> mlx4_register_interface() and mlx4_unregister_interface(), respectively.
> Function mlx4_do_bond() is adjusted to walk over the adev array and
> re-adds a specific auxiliary device if its driver sets the
> MLX4_INTFF_BONDING flag.
>
> Signed-off-by: Petr Pavlu <[email protected]>
> Tested-by: Leon Romanovsky <[email protected]>
> ---
> drivers/net/ethernet/mellanox/mlx4/Kconfig | 1 +
> drivers/net/ethernet/mellanox/mlx4/intf.c | 230 ++++++++++++++++++++-
> drivers/net/ethernet/mellanox/mlx4/main.c | 17 +-
> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 +
> include/linux/mlx4/device.h | 7 +
> include/linux/mlx4/driver.h | 11 +
> 6 files changed, 268 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
> index 1b4b1f642317..825e05fb8607 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
> +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
> @@ -27,6 +27,7 @@ config MLX4_EN_DCB
> config MLX4_CORE
> tristate
> depends on PCI
> + select AUXILIARY_BUS
> select NET_DEVLINK
> default n
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
> index 30aead34ce08..4b1e18e4a682 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/intf.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
> @@ -48,6 +48,89 @@ struct mlx4_device_context {
> static LIST_HEAD(intf_list);
> static LIST_HEAD(dev_list);
> static DEFINE_MUTEX(intf_mutex);
> +static DEFINE_IDA(mlx4_adev_ida);
> +
> +static const struct mlx4_adev_device {
> + const char *suffix;
> + bool (*is_supported)(struct mlx4_dev *dev);
> +} mlx4_adev_devices[1] = {};
> +
> +int mlx4_adev_init(struct mlx4_dev *dev)
> +{
> + struct mlx4_priv *priv = mlx4_priv(dev);
> +
> + priv->adev_idx = ida_alloc(&mlx4_adev_ida, GFP_KERNEL);
> + if (priv->adev_idx < 0)
> + return priv->adev_idx;
> +
> + priv->adev = kcalloc(ARRAY_SIZE(mlx4_adev_devices),
> + sizeof(struct mlx4_adev *), GFP_KERNEL);
> + if (!priv->adev) {
> + ida_free(&mlx4_adev_ida, priv->adev_idx);
> + return -ENOMEM;
> + }
> +
> + return 0;
> +}
> +
> +void mlx4_adev_cleanup(struct mlx4_dev *dev)
> +{
> + struct mlx4_priv *priv = mlx4_priv(dev);
> +
> + kfree(priv->adev);
> + ida_free(&mlx4_adev_ida, priv->adev_idx);
> +}
> +
> +static void adev_release(struct device *dev)
> +{
> + struct mlx4_adev *mlx4_adev =
> + container_of(dev, struct mlx4_adev, adev.dev);
> + struct mlx4_priv *priv = mlx4_priv(mlx4_adev->mdev);
> + int idx = mlx4_adev->idx;
> +
> + kfree(mlx4_adev);
> + priv->adev[idx] = NULL;
> +}
> +
> +static struct mlx4_adev *add_adev(struct mlx4_dev *dev, int idx)
> +{
> + struct mlx4_priv *priv = mlx4_priv(dev);
> + const char *suffix = mlx4_adev_devices[idx].suffix;
> + struct auxiliary_device *adev;
> + struct mlx4_adev *madev;
> + int ret;
> +
> + madev = kzalloc(sizeof(*madev), GFP_KERNEL);
> + if (!madev)
> + return ERR_PTR(-ENOMEM);
> +
> + adev = &madev->adev;
> + adev->id = priv->adev_idx;
> + adev->name = suffix;
> + adev->dev.parent = &dev->persist->pdev->dev;
> + adev->dev.release = adev_release;
> + madev->mdev = dev;
> + madev->idx = idx;
> +
> + ret = auxiliary_device_init(adev);
> + if (ret) {
> + kfree(madev);
> + return ERR_PTR(ret);
> + }
> +
> + ret = auxiliary_device_add(adev);
> + if (ret) {
madev is allocated, but it is not handled here when auxiliary_device_add
error. It should be freed, too?
That is, add "kfree(madev);" here?
If madev will be handled in other place, please add some comments here
to indicate madev is handled in other place.
Zhu Yanjun
> + auxiliary_device_uninit(adev);
> + return ERR_PTR(ret);
> + }
> + return madev;
> +}
> +
> +static void del_adev(struct auxiliary_device *adev)
> +{
> + auxiliary_device_delete(adev);
> + auxiliary_device_uninit(adev);
> +}
>
> static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
> {
> @@ -120,12 +203,24 @@ void mlx4_unregister_interface(struct mlx4_interface *intf)
> }
> EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
>
> +int mlx4_register_auxiliary_driver(struct mlx4_adrv *madrv)
> +{
> + return auxiliary_driver_register(&madrv->adrv);
> +}
> +EXPORT_SYMBOL_GPL(mlx4_register_auxiliary_driver);
> +
> +void mlx4_unregister_auxiliary_driver(struct mlx4_adrv *madrv)
> +{
> + auxiliary_driver_unregister(&madrv->adrv);
> +}
> +EXPORT_SYMBOL_GPL(mlx4_unregister_auxiliary_driver);
> +
> int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
> {
> struct mlx4_priv *priv = mlx4_priv(dev);
> struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx;
> unsigned long flags;
> - int ret;
> + int i, ret;
> LIST_HEAD(bond_list);
>
> if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
> @@ -177,6 +272,57 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
> dev_ctx->intf->protocol, enable ?
> "enabled" : "disabled");
> }
> +
> + mutex_lock(&intf_mutex);
> +
> + for (i = 0; i < ARRAY_SIZE(mlx4_adev_devices); i++) {
> + struct mlx4_adev *madev = priv->adev[i];
> + struct mlx4_adrv *madrv;
> + enum mlx4_protocol protocol;
> +
> + if (!madev)
> + continue;
> +
> + device_lock(&madev->adev.dev);
> + if (!madev->adev.dev.driver) {
> + device_unlock(&madev->adev.dev);
> + continue;
> + }
> +
> + madrv = container_of(madev->adev.dev.driver, struct mlx4_adrv,
> + adrv.driver);
> + if (!(madrv->flags & MLX4_INTFF_BONDING)) {
> + device_unlock(&madev->adev.dev);
> + continue;
> + }
> +
> + if (mlx4_is_mfunc(dev)) {
> + mlx4_dbg(dev,
> + "SRIOV, disabled HA mode for intf proto %d\n",
> + madrv->protocol);
> + device_unlock(&madev->adev.dev);
> + continue;
> + }
> +
> + protocol = madrv->protocol;
> + device_unlock(&madev->adev.dev);
> +
> + del_adev(&madev->adev);
> + priv->adev[i] = add_adev(dev, i);
> + if (IS_ERR(priv->adev[i])) {
> + mlx4_warn(dev, "Device[%d] (%s) failed to load\n", i,
> + mlx4_adev_devices[i].suffix);
> + priv->adev[i] = NULL;
> + continue;
> + }
> +
> + mlx4_dbg(dev,
> + "Interface for protocol %d restarted with bonded mode %s\n",
> + protocol, enable ? "enabled" : "disabled");
> + }
> +
> + mutex_unlock(&intf_mutex);
> +
> return 0;
> }
>
> @@ -206,10 +352,80 @@ int mlx4_unregister_event_notifier(struct mlx4_dev *dev,
> }
> EXPORT_SYMBOL(mlx4_unregister_event_notifier);
>
> +static int add_drivers(struct mlx4_dev *dev)
> +{
> + struct mlx4_priv *priv = mlx4_priv(dev);
> + int i, ret = 0;
> +
> + for (i = 0; i < ARRAY_SIZE(mlx4_adev_devices); i++) {
> + bool is_supported = false;
> +
> + if (priv->adev[i])
> + continue;
> +
> + if (mlx4_adev_devices[i].is_supported)
> + is_supported = mlx4_adev_devices[i].is_supported(dev);
> +
> + if (!is_supported)
> + continue;
> +
> + priv->adev[i] = add_adev(dev, i);
> + if (IS_ERR(priv->adev[i])) {
> + mlx4_warn(dev, "Device[%d] (%s) failed to load\n", i,
> + mlx4_adev_devices[i].suffix);
> + /* We continue to rescan drivers and leave to the caller
> + * to make decision if to release everything or
> + * continue. */
> + ret = PTR_ERR(priv->adev[i]);
> + priv->adev[i] = NULL;
> + }
> + }
> + return ret;
> +}
> +
> +static void delete_drivers(struct mlx4_dev *dev)
> +{
> + struct mlx4_priv *priv = mlx4_priv(dev);
> + bool delete_all;
> + int i;
> +
> + delete_all = !(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP);
> +
> + for (i = ARRAY_SIZE(mlx4_adev_devices) - 1; i >= 0; i--) {
> + bool is_supported = false;
> +
> + if (!priv->adev[i])
> + continue;
> +
> + if (mlx4_adev_devices[i].is_supported && !delete_all)
> + is_supported = mlx4_adev_devices[i].is_supported(dev);
> +
> + if (is_supported)
> + continue;
> +
> + del_adev(&priv->adev[i]->adev);
> + priv->adev[i] = NULL;
> + }
> +}
> +
> +/* This function is used after mlx4_dev is reconfigured.
> + */
> +static int rescan_drivers_locked(struct mlx4_dev *dev)
> +{
> + lockdep_assert_held(&intf_mutex);
> +
> + delete_drivers(dev);
> + if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP))
> + return 0;
> +
> + return add_drivers(dev);
> +}
> +
> int mlx4_register_device(struct mlx4_dev *dev)
> {
> struct mlx4_priv *priv = mlx4_priv(dev);
> struct mlx4_interface *intf;
> + int ret;
>
> mutex_lock(&intf_mutex);
>
> @@ -218,10 +434,18 @@ int mlx4_register_device(struct mlx4_dev *dev)
> list_for_each_entry(intf, &intf_list, list)
> mlx4_add_device(intf, priv);
>
> + ret = rescan_drivers_locked(dev);
> +
> mutex_unlock(&intf_mutex);
> +
> + if (ret) {
> + mlx4_unregister_device(dev);
> + return ret;
> + }
> +
> mlx4_start_catas_poll(dev);
>
> - return 0;
> + return ret;
> }
>
> void mlx4_unregister_device(struct mlx4_dev *dev)
> @@ -253,6 +477,8 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
> list_del(&priv->dev_list);
> dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP;
>
> + rescan_drivers_locked(dev);
> +
> mutex_unlock(&intf_mutex);
> }
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
> index 0ed490b99163..c4ec7377aa71 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c
> @@ -3429,6 +3429,10 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
> INIT_LIST_HEAD(&priv->ctx_list);
> spin_lock_init(&priv->ctx_lock);
>
> + err = mlx4_adev_init(dev);
> + if (err)
> + return err;
> +
> ATOMIC_INIT_NOTIFIER_HEAD(&priv->event_nh);
>
> mutex_init(&priv->port_mutex);
> @@ -3455,10 +3459,11 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
> err = mlx4_get_ownership(dev);
> if (err) {
> if (err < 0)
> - return err;
> + goto err_adev;
> else {
> mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
> - return -EINVAL;
> + err = -EINVAL;
> + goto err_adev;
> }
> }
>
> @@ -3806,6 +3811,9 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
> mlx4_free_ownership(dev);
>
> kfree(dev_cap);
> +
> +err_adev:
> + mlx4_adev_cleanup(dev);
> return err;
> }
>
> @@ -4186,6 +4194,8 @@ static void mlx4_unload_one(struct pci_dev *pdev)
> mlx4_slave_destroy_special_qp_cap(dev);
> kfree(dev->dev_vfs);
>
> + mlx4_adev_cleanup(dev);
> +
> mlx4_clean_dev(dev);
> priv->pci_dev_data = pci_dev_data;
> priv->removed = 1;
> @@ -4573,6 +4583,9 @@ static int __init mlx4_init(void)
> {
> int ret;
>
> + WARN_ONCE(strcmp(MLX4_ADEV_NAME, KBUILD_MODNAME),
> + "mlx4_core name not in sync with kernel module name");
> +
> if (mlx4_verify_params())
> return -EINVAL;
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> index ece9acb6a869..d5050bfb342f 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> @@ -47,6 +47,7 @@
> #include <linux/spinlock.h>
> #include <net/devlink.h>
> #include <linux/rwsem.h>
> +#include <linux/auxiliary_bus.h>
> #include <linux/notifier.h>
>
> #include <linux/mlx4/device.h>
> @@ -884,6 +885,8 @@ struct mlx4_priv {
> struct list_head dev_list;
> struct list_head ctx_list;
> spinlock_t ctx_lock;
> + struct mlx4_adev **adev;
> + int adev_idx;
> struct atomic_notifier_head event_nh;
>
> int pci_dev_data;
> @@ -1052,6 +1055,9 @@ void mlx4_catas_end(struct mlx4_dev *dev);
> int mlx4_crdump_init(struct mlx4_dev *dev);
> void mlx4_crdump_end(struct mlx4_dev *dev);
> int mlx4_restart_one(struct pci_dev *pdev);
> +
> +int mlx4_adev_init(struct mlx4_dev *dev);
> +void mlx4_adev_cleanup(struct mlx4_dev *dev);
> int mlx4_register_device(struct mlx4_dev *dev);
> void mlx4_unregister_device(struct mlx4_dev *dev);
> void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
> index 049d8a4b044d..27f42f713c89 100644
> --- a/include/linux/mlx4/device.h
> +++ b/include/linux/mlx4/device.h
> @@ -33,6 +33,7 @@
> #ifndef MLX4_DEVICE_H
> #define MLX4_DEVICE_H
>
> +#include <linux/auxiliary_bus.h>
> #include <linux/if_ether.h>
> #include <linux/pci.h>
> #include <linux/completion.h>
> @@ -889,6 +890,12 @@ struct mlx4_dev {
> u8 uar_page_shift;
> };
>
> +struct mlx4_adev {
> + struct auxiliary_device adev;
> + struct mlx4_dev *mdev;
> + int idx;
> +};
> +
> struct mlx4_clock_params {
> u64 offset;
> u8 bar;
> diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
> index 781d5a0c2faa..9cf157d381c6 100644
> --- a/include/linux/mlx4/driver.h
> +++ b/include/linux/mlx4/driver.h
> @@ -34,9 +34,12 @@
> #define MLX4_DRIVER_H
>
> #include <net/devlink.h>
> +#include <linux/auxiliary_bus.h>
> #include <linux/notifier.h>
> #include <linux/mlx4/device.h>
>
> +#define MLX4_ADEV_NAME "mlx4_core"
> +
> struct mlx4_dev;
>
> #define MLX4_MAC_MASK 0xffffffffffffULL
> @@ -63,8 +66,16 @@ struct mlx4_interface {
> int flags;
> };
>
> +struct mlx4_adrv {
> + struct auxiliary_driver adrv;
> + enum mlx4_protocol protocol;
> + int flags;
> +};
> +
> int mlx4_register_interface(struct mlx4_interface *intf);
> void mlx4_unregister_interface(struct mlx4_interface *intf);
> +int mlx4_register_auxiliary_driver(struct mlx4_adrv *madrv);
> +void mlx4_unregister_auxiliary_driver(struct mlx4_adrv *madrv);
>
> int mlx4_register_event_notifier(struct mlx4_dev *dev,
> struct notifier_block *nb);
On 8/6/23 05:16, Zhu Yanjun wrote:
> 在 2023/8/4 23:05, Petr Pavlu 写道:
>> Add an auxiliary virtual bus to model the mlx4 driver structure. The
>> code is added along the current custom device management logic.
>> Subsequent patches switch mlx4_en and mlx4_ib to the auxiliary bus and
>> the old interface is then removed.
>>
>> Structure mlx4_priv gains a new adev dynamic array to keep track of its
>> auxiliary devices. Access to the array is protected by the global
>> mlx4_intf mutex.
>>
>> Functions mlx4_register_device() and mlx4_unregister_device() are
>> updated to expose auxiliary devices on the bus in order to load mlx4_en
>> and/or mlx4_ib. Functions mlx4_register_auxiliary_driver() and
>> mlx4_unregister_auxiliary_driver() are added to substitute
>> mlx4_register_interface() and mlx4_unregister_interface(), respectively.
>> Function mlx4_do_bond() is adjusted to walk over the adev array and
>> re-adds a specific auxiliary device if its driver sets the
>> MLX4_INTFF_BONDING flag.
>>
>> Signed-off-by: Petr Pavlu <[email protected]>
>> Tested-by: Leon Romanovsky <[email protected]>
>> ---
>> drivers/net/ethernet/mellanox/mlx4/Kconfig | 1 +
>> drivers/net/ethernet/mellanox/mlx4/intf.c | 230 ++++++++++++++++++++-
>> drivers/net/ethernet/mellanox/mlx4/main.c | 17 +-
>> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 +
>> include/linux/mlx4/device.h | 7 +
>> include/linux/mlx4/driver.h | 11 +
>> 6 files changed, 268 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
>> index 1b4b1f642317..825e05fb8607 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
>> +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
>> @@ -27,6 +27,7 @@ config MLX4_EN_DCB
>> config MLX4_CORE
>> tristate
>> depends on PCI
>> + select AUXILIARY_BUS
>> select NET_DEVLINK
>> default n
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
>> index 30aead34ce08..4b1e18e4a682 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/intf.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
>> @@ -48,6 +48,89 @@ struct mlx4_device_context {
>> static LIST_HEAD(intf_list);
>> static LIST_HEAD(dev_list);
>> static DEFINE_MUTEX(intf_mutex);
>> +static DEFINE_IDA(mlx4_adev_ida);
>> +
>> +static const struct mlx4_adev_device {
>> + const char *suffix;
>> + bool (*is_supported)(struct mlx4_dev *dev);
>> +} mlx4_adev_devices[1] = {};
>> +
>> +int mlx4_adev_init(struct mlx4_dev *dev)
>> +{
>> + struct mlx4_priv *priv = mlx4_priv(dev);
>> +
>> + priv->adev_idx = ida_alloc(&mlx4_adev_ida, GFP_KERNEL);
>> + if (priv->adev_idx < 0)
>> + return priv->adev_idx;
>> +
>> + priv->adev = kcalloc(ARRAY_SIZE(mlx4_adev_devices),
>> + sizeof(struct mlx4_adev *), GFP_KERNEL);
>> + if (!priv->adev) {
>> + ida_free(&mlx4_adev_ida, priv->adev_idx);
>> + return -ENOMEM;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +void mlx4_adev_cleanup(struct mlx4_dev *dev)
>> +{
>> + struct mlx4_priv *priv = mlx4_priv(dev);
>> +
>> + kfree(priv->adev);
>> + ida_free(&mlx4_adev_ida, priv->adev_idx);
>> +}
>> +
>> +static void adev_release(struct device *dev)
>> +{
>> + struct mlx4_adev *mlx4_adev =
>> + container_of(dev, struct mlx4_adev, adev.dev);
>> + struct mlx4_priv *priv = mlx4_priv(mlx4_adev->mdev);
>> + int idx = mlx4_adev->idx;
>> +
>> + kfree(mlx4_adev);
>> + priv->adev[idx] = NULL;
>> +}
>> +
>> +static struct mlx4_adev *add_adev(struct mlx4_dev *dev, int idx)
>> +{
>> + struct mlx4_priv *priv = mlx4_priv(dev);
>> + const char *suffix = mlx4_adev_devices[idx].suffix;
>> + struct auxiliary_device *adev;
>> + struct mlx4_adev *madev;
>> + int ret;
>> +
>> + madev = kzalloc(sizeof(*madev), GFP_KERNEL);
>> + if (!madev)
>> + return ERR_PTR(-ENOMEM);
>> +
>> + adev = &madev->adev;
>> + adev->id = priv->adev_idx;
>> + adev->name = suffix;
>> + adev->dev.parent = &dev->persist->pdev->dev;
>> + adev->dev.release = adev_release;
>> + madev->mdev = dev;
>> + madev->idx = idx;
>> +
>> + ret = auxiliary_device_init(adev);
>> + if (ret) {
>> + kfree(madev);
>> + return ERR_PTR(ret);
>> + }
>> +
>> + ret = auxiliary_device_add(adev);
>> + if (ret) {
>
> madev is allocated, but it is not handled here when auxiliary_device_add
> error. It should be freed, too?
> That is, add "kfree(madev);" here?
>
> If madev will be handled in other place, please add some comments here
> to indicate madev is handled in other place.
A successful call to auxiliary_device_init() registers the device's
.release callback. The madev storage is freed by calling
auxiliary_device_uninit() which invokes adev_release() -> kfree().
Thanks,
Petr
On Fri, Aug 04, 2023 at 05:05:24PM +0200, Petr Pavlu wrote:
> Add an auxiliary virtual bus to model the mlx4 driver structure. The
> code is added along the current custom device management logic.
> Subsequent patches switch mlx4_en and mlx4_ib to the auxiliary bus and
> the old interface is then removed.
>
> Structure mlx4_priv gains a new adev dynamic array to keep track of its
> auxiliary devices. Access to the array is protected by the global
> mlx4_intf mutex.
>
> Functions mlx4_register_device() and mlx4_unregister_device() are
> updated to expose auxiliary devices on the bus in order to load mlx4_en
> and/or mlx4_ib. Functions mlx4_register_auxiliary_driver() and
> mlx4_unregister_auxiliary_driver() are added to substitute
> mlx4_register_interface() and mlx4_unregister_interface(), respectively.
> Function mlx4_do_bond() is adjusted to walk over the adev array and
> re-adds a specific auxiliary device if its driver sets the
> MLX4_INTFF_BONDING flag.
>
> Signed-off-by: Petr Pavlu <[email protected]>
> Tested-by: Leon Romanovsky <[email protected]>
> ---
> drivers/net/ethernet/mellanox/mlx4/Kconfig | 1 +
> drivers/net/ethernet/mellanox/mlx4/intf.c | 230 ++++++++++++++++++++-
> drivers/net/ethernet/mellanox/mlx4/main.c | 17 +-
> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 +
> include/linux/mlx4/device.h | 7 +
> include/linux/mlx4/driver.h | 11 +
> 6 files changed, 268 insertions(+), 4 deletions(-)
>
Thanks,
Reviewed-by: Leon Romanovsky <[email protected]>