This series provides knobs which will enable users to
minimize memory consumption of mlx5 Functions (PF/VF/SF).
mlx5 exposes two new generic devlink params for EQ size
configuration and uses devlink generic param max_macs.
Patches summary:
- Patches-1-2 Provides I/O EQ size param which enables to save
up to 128KB.
- Patches-3-4 Provides event EQ size param which enables to save
up to 512KB.
- Patch-5 Clarify max_macs param.
- Patch-6 Provides max_macs param which enables to save up to 70KB
In total, this series can save up to 700KB per Function.
---
changelog:
v1->v2:
- convert io_eq_size and event_eq_size from devlink_resources to
generic devlink_params
Shay Drory (6):
devlink: Add new "io_eq_size" generic device param
net/mlx5: Let user configure io_eq_size param
devlink: Add new "event_eq_size" generic device param
net/mlx5: Let user configure event_eq_size param
devlink: Clarifies max_macs generic devlink param
net/mlx5: Let user configure max_macs generic param
.../networking/devlink/devlink-params.rst | 12 ++-
Documentation/networking/devlink/mlx5.rst | 10 +++
.../net/ethernet/mellanox/mlx5/core/devlink.c | 88 +++++++++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/eq.c | 34 ++++++-
.../net/ethernet/mellanox/mlx5/core/main.c | 18 ++++
include/linux/mlx5/mlx5_ifc.h | 2 +-
include/net/devlink.h | 8 ++
net/core/devlink.c | 10 +++
8 files changed, 177 insertions(+), 5 deletions(-)
--
2.21.3
Add new device generic parameter to determine the size of the
asynchronous control events EQ.
For example, to reduce event EQ size to 64, execute:
$ devlink dev param set pci/0000:06:00.0 \
name event_eq_size value 64 cmode driverinit
$ devlink dev reload pci/0000:06:00.0
Signed-off-by: Shay Drory <[email protected]>
Reviewed-by: Moshe Shemesh <[email protected]>
---
Documentation/networking/devlink/devlink-params.rst | 3 +++
include/net/devlink.h | 4 ++++
net/core/devlink.c | 5 +++++
3 files changed, 12 insertions(+)
diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst
index cd9342305a13..0eddee6e66f3 100644
--- a/Documentation/networking/devlink/devlink-params.rst
+++ b/Documentation/networking/devlink/devlink-params.rst
@@ -132,3 +132,6 @@ own name.
* - ``io_eq_size``
- u16
- Control the size of I/O completion EQs.
+ * - ``event_eq_size``
+ - u16
+ - Control the size of asynchronous control events EQ.
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 61efa45b8786..99b06740a918 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -460,6 +460,7 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -515,6 +516,9 @@ enum devlink_param_generic_id {
#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size"
#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U16
+#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size"
+#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U16
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 0d4e63d11585..d9f3c994e704 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4471,6 +4471,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
.type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
--
2.21.3
Add new device generic parameter to determine the size of the
I/O completion EQs.
For example, to reduce I/O EQ size to 64, execute:
$ devlink dev param set pci/0000:06:00.0 \
name io_eq_size value 64 cmode driverinit
$ devlink dev reload pci/0000:06:00.0
Signed-off-by: Shay Drory <[email protected]>
Reviewed-by: Moshe Shemesh <[email protected]>
---
Documentation/networking/devlink/devlink-params.rst | 3 +++
include/net/devlink.h | 4 ++++
net/core/devlink.c | 5 +++++
3 files changed, 12 insertions(+)
diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst
index b7dfe693a332..cd9342305a13 100644
--- a/Documentation/networking/devlink/devlink-params.rst
+++ b/Documentation/networking/devlink/devlink-params.rst
@@ -129,3 +129,6 @@ own name.
will NACK any attempt of other host to reset the device. This parameter
is useful for setups where a device is shared by different hosts, such
as multi-host setup.
+ * - ``io_eq_size``
+ - u16
+ - Control the size of I/O completion EQs.
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3276a29f2b81..61efa45b8786 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -459,6 +459,7 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -511,6 +512,9 @@ enum devlink_param_generic_id {
#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp"
#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL
+#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size"
+#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U16
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
diff --git a/net/core/devlink.c b/net/core/devlink.c
index db3b52110cf2..0d4e63d11585 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4466,6 +4466,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME,
.type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
--
2.21.3
Currently, each I/O EQ is taking 128KB of memory. This size
is not needed in all use cases, and is critical with large scale.
Hence, allow user to configure the size of I/O EQs.
For example, to reduce I/O EQ size to 64, execute:
$ devlink dev param set pci/0000:00:0b.0 name io_eq_size value 64 \
cmode driverinit
$ devlink dev reload pci/0000:00:0b.0
Signed-off-by: Shay Drory <[email protected]>
Reviewed-by: Moshe Shemesh <[email protected]>
---
Documentation/networking/devlink/mlx5.rst | 4 ++++
.../net/ethernet/mellanox/mlx5/core/devlink.c | 14 ++++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/eq.c | 18 +++++++++++++++++-
3 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 4e4b97f7971a..291e7f63af73 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -14,8 +14,12 @@ Parameters
* - Name
- Mode
+ - Validation
* - ``enable_roce``
- driverinit
+ * - ``io_eq_size``
+ - driverinit
+ - The range is between 64 and 4096.
The ``mlx5`` driver also implements the following driver-specific
parameters.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 1c98652b244a..d8a705a94dcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -546,6 +546,13 @@ static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32
return 0;
}
+static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL;
+}
+
static const struct devlink_param mlx5_devlink_params[] = {
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
"flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
@@ -570,6 +577,8 @@ static const struct devlink_param mlx5_devlink_params[] = {
DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
mlx5_devlink_enable_remote_dev_reset_get,
mlx5_devlink_enable_remote_dev_reset_set, NULL),
+ DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_eq_depth_validate),
};
static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
@@ -608,6 +617,11 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
value);
}
#endif
+
+ value.vu16 = MLX5_COMP_EQ_SIZE;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ value);
}
static const struct devlink_param enable_eth_param =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 792e0d6aa861..230f62804b73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -19,6 +19,7 @@
#include "lib/clock.h"
#include "diag/fw_tracer.h"
#include "mlx5_irq.h"
+#include "devlink.h"
enum {
MLX5_EQE_OWNER_INIT_VAL = 0x1,
@@ -796,6 +797,21 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
}
}
+static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ &val);
+ if (!err)
+ return val.vu16;
+ mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
+ return MLX5_COMP_EQ_SIZE;
+}
+
static int create_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -807,7 +823,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
INIT_LIST_HEAD(&table->comp_eqs_list);
ncomp_eqs = table->num_comp_eqs;
- nent = MLX5_COMP_EQ_SIZE;
+ nent = comp_eq_depth_devlink_param_get(dev);
for (i = 0; i < ncomp_eqs; i++) {
struct mlx5_eq_param param = {};
int vecidx = i;
--
2.21.3
Event EQ is an EQ which received the notification of almost all the
events generated by the NIC.
Currently, each event EQ is taking 512KB of memory. This size is not
needed in most use cases, and is critical with large scale. Hence,
allow user to configure the size of the event EQ.
For example to reduce event EQ size to 64, execute::
$ devlink dev param set pci/0000:00:0b.0 name event_eq_size value 64 \
cmode driverinit
$ devlink dev reload pci/0000:00:0b.0
Signed-off-by: Shay Drory <[email protected]>
Reviewed-by: Moshe Shemesh <[email protected]>
---
Documentation/networking/devlink/mlx5.rst | 3 +++
.../net/ethernet/mellanox/mlx5/core/devlink.c | 7 +++++++
drivers/net/ethernet/mellanox/mlx5/core/eq.c | 16 +++++++++++++++-
3 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 291e7f63af73..38089f0aefcf 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -20,6 +20,9 @@ Parameters
* - ``io_eq_size``
- driverinit
- The range is between 64 and 4096.
+ * - ``event_eq_size``
+ - driverinit
+ - The range is between 64 and 4096.
The ``mlx5`` driver also implements the following driver-specific
parameters.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index d8a705a94dcc..31bbbb30acae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -579,6 +579,8 @@ static const struct devlink_param mlx5_devlink_params[] = {
mlx5_devlink_enable_remote_dev_reset_set, NULL),
DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL, mlx5_devlink_eq_depth_validate),
+ DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_eq_depth_validate),
};
static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
@@ -622,6 +624,11 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
devlink_param_driverinit_value_set(devlink,
DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
value);
+
+ value.vu16 = MLX5_NUM_ASYNC_EQE;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ value);
}
static const struct devlink_param enable_eth_param =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 230f62804b73..3ec140af66fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -623,6 +623,20 @@ static void cleanup_async_eq(struct mlx5_core_dev *dev,
name, err);
}
+static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ &val);
+ if (!err)
+ return val.vu16;
+ mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
+ return MLX5_NUM_ASYNC_EQE;
+}
static int create_async_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -647,7 +661,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
param = (struct mlx5_eq_param) {
.irq_index = MLX5_IRQ_EQ_CTRL,
- .nent = MLX5_NUM_ASYNC_EQE,
+ .nent = async_eq_depth_devlink_param_get(dev),
};
gather_async_events_mask(dev, param.mask);
--
2.21.3
Wed, Dec 08, 2021 at 08:00:01AM CET, [email protected] wrote:
>Add new device generic parameter to determine the size of the
>I/O completion EQs.
>
>For example, to reduce I/O EQ size to 64, execute:
>$ devlink dev param set pci/0000:06:00.0 \
> name io_eq_size value 64 cmode driverinit
>$ devlink dev reload pci/0000:06:00.0
>
>Signed-off-by: Shay Drory <[email protected]>
>Reviewed-by: Moshe Shemesh <[email protected]>
>---
> Documentation/networking/devlink/devlink-params.rst | 3 +++
> include/net/devlink.h | 4 ++++
> net/core/devlink.c | 5 +++++
> 3 files changed, 12 insertions(+)
>
>diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst
>index b7dfe693a332..cd9342305a13 100644
>--- a/Documentation/networking/devlink/devlink-params.rst
>+++ b/Documentation/networking/devlink/devlink-params.rst
>@@ -129,3 +129,6 @@ own name.
> will NACK any attempt of other host to reset the device. This parameter
> is useful for setups where a device is shared by different hosts, such
> as multi-host setup.
>+ * - ``io_eq_size``
>+ - u16
Hmm, I wonder if this wouldn't be better to have it 32bits which might
be future safe.
Otherwise, this looks fine.
>+ - Control the size of I/O completion EQs.
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 3276a29f2b81..61efa45b8786 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -459,6 +459,7 @@ enum devlink_param_generic_id {
> DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
> DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
> DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
>+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
>
> /* add new param generic ids above here*/
> __DEVLINK_PARAM_GENERIC_ID_MAX,
>@@ -511,6 +512,9 @@ enum devlink_param_generic_id {
> #define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp"
> #define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL
>
>+#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size"
>+#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U16
>+
> #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
> { \
> .id = DEVLINK_PARAM_GENERIC_ID_##_id, \
>diff --git a/net/core/devlink.c b/net/core/devlink.c
>index db3b52110cf2..0d4e63d11585 100644
>--- a/net/core/devlink.c
>+++ b/net/core/devlink.c
>@@ -4466,6 +4466,11 @@ static const struct devlink_param devlink_param_generic[] = {
> .name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME,
> .type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE,
> },
>+ {
>+ .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
>+ .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
>+ .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
>+ },
> };
>
> static int devlink_param_generic_verify(const struct devlink_param *param)
>--
>2.21.3
>