2020-10-01 14:02:35

by Moshe Shemesh

[permalink] [raw]
Subject: [PATCH net-next 04/16] devlink: Add reload stats

Add reload stats to hold the history per reload action type and limit.

For example, the number of times fw_activate has been performed on this
device since the driver module was added or if the firmware activation
was performed with or without reset.

Add devlink notification on stats update.

Expose devlink reload stats to the user through devlink dev get command.

Examples:
$ devlink dev show
pci/0000:82:00.0:
stats:
reload_stats:
driver_reinit 2
fw_activate 1
fw_activate_no_reset 0
pci/0000:82:00.1:
stats:
reload_stats:
driver_reinit 1
fw_activate 0
fw_activate_no_reset 0

$ devlink dev show -jp
{
"dev": {
"pci/0000:82:00.0": {
"stats": {
"reload_stats": [ {
"driver_reinit": 2
},{
"fw_activate": 1
},{
"fw_activate_no_reset": 0
} ]
}
},
"pci/0000:82:00.1": {
"stats": {
"reload_stats": [ {
"driver_reinit": 1
},{
"fw_activate": 0
},{
"fw_activate_no_reset": 0
} ]
}
}
}
}

Signed-off-by: Moshe Shemesh <[email protected]>
---
RFCv5 -> v1:
- Changed the stats output structure, have 2 stats, one for local and
one for remote
- Resplit this patch and the next one by remote/local reload stast
instead of set/get reload stats
- Add helper function devlink_reload_stats_put()
RFCv4 -> RFCv5:
- Add separate reload action stats for updating on remote actions
- Protect from updating remote actions stats during reload_down()/up()
RFCv3 -> RFCv4:
- Renamed reload_actions_cnts to reload_action_stats
- Add devlink notifications on stats update
- Renamed devlink_reload_actions_implicit_actions_performed() and add
function comment in code
RFCv2 -> RFCv3:
- New patch
---
include/net/devlink.h | 7 +++
include/uapi/linux/devlink.h | 5 ++
net/core/devlink.c | 97 ++++++++++++++++++++++++++++++++++++
3 files changed, 109 insertions(+)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 43dde69086e5..0f3bd23b6c04 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -20,6 +20,9 @@
#include <uapi/linux/devlink.h>
#include <linux/xarray.h>

+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
+ (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
struct devlink_ops;

struct devlink {
@@ -38,6 +41,7 @@ struct devlink {
struct list_head trap_policer_list;
const struct devlink_ops *ops;
struct xarray snapshot_ids;
+ u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
struct device *dev;
possible_net_t _net;
struct mutex lock; /* Serializes access to devlink instance specific objects such as
@@ -1470,6 +1474,9 @@ void
devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);

bool devlink_is_reload_failed(const struct devlink *devlink);
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+ enum devlink_reload_limit limit,
+ unsigned long actions_performed);

void devlink_flash_update_begin_notify(struct devlink *devlink);
void devlink_flash_update_end_notify(struct devlink *devlink);
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index cc5dc4c07b4a..97e0137f6201 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -526,6 +526,11 @@ enum devlink_attr {
DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, /* u64 */
DEVLINK_ATTR_RELOAD_LIMIT, /* u8 */

+ DEVLINK_ATTR_DEV_STATS, /* nested */
+ DEVLINK_ATTR_RELOAD_STATS, /* nested */
+ DEVLINK_ATTR_RELOAD_STATS_ENTRY, /* nested */
+ DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */
+
/* add new attributes above here, update the policy in devlink.c */

__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6de7d6aa6ed1..05516f1e4c3e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -500,10 +500,68 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
return test_bit(limit, &devlink->ops->reload_limits);
}

+static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
+ enum devlink_reload_limit limit, u32 value)
+{
+ struct nlattr *reload_stats_entry;
+
+ reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
+ if (!reload_stats_entry)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_LIMIT, limit))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
+ goto nla_put_failure;
+ nla_nest_end(msg, reload_stats_entry);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_entry);
+ return -EMSGSIZE;
+}
+
+static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink)
+{
+ struct nlattr *reload_stats_attr;
+ int i, j, stat_idx;
+ u32 value;
+
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+
+ if (!reload_stats_attr)
+ return -EMSGSIZE;
+
+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+ if (j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+ !devlink_reload_limit_is_supported(devlink, j))
+ continue;
+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+ if (!devlink_reload_action_is_supported(devlink, i) ||
+ devlink_reload_combination_is_invalid(i, j))
+ continue;
+
+ stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
+ value = devlink->reload_stats[stat_idx];
+ if (devlink_reload_stat_put(msg, i, j, value))
+ goto nla_put_failure;
+ }
+ }
+ nla_nest_end(msg, reload_stats_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_attr);
+ return -EMSGSIZE;
+}
+
static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
u32 seq, int flags)
{
+ struct nlattr *dev_stats;
void *hdr;

hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -515,9 +573,19 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
goto nla_put_failure;

+ dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
+ if (!dev_stats)
+ goto nla_put_failure;
+
+ if (devlink_reload_stats_put(msg, devlink))
+ goto dev_stats_nest_cancel;
+
+ nla_nest_end(msg, dev_stats);
genlmsg_end(msg, hdr);
return 0;

+dev_stats_nest_cancel:
+ nla_nest_cancel(msg, dev_stats);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -3004,6 +3072,34 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_is_reload_failed);

+static void
+__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
+ enum devlink_reload_limit limit, unsigned long actions_performed)
+{
+ int stat_idx;
+ int action;
+
+ if (!actions_performed)
+ return;
+
+ if (WARN_ON(limit > DEVLINK_RELOAD_LIMIT_MAX))
+ return;
+ for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) {
+ if (!test_bit(action, &actions_performed))
+ continue;
+ stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
+ reload_stats[stat_idx]++;
+ }
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void
+devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
+ unsigned long actions_performed)
+{
+ __devlink_reload_stats_update(devlink, devlink->reload_stats, limit, actions_performed);
+}
+
static int devlink_reload(struct devlink *devlink, struct net *dest_net,
enum devlink_reload_action action, enum devlink_reload_limit limit,
struct netlink_ext_ack *extack, unsigned long *actions_performed)
@@ -3026,6 +3122,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
return err;

WARN_ON(!test_bit(action, actions_performed));
+ devlink_reload_stats_update(devlink, limit, *actions_performed);
return 0;
}

--
2.18.2


2020-10-01 21:27:53

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH net-next 04/16] devlink: Add reload stats

On Thu, 1 Oct 2020 16:59:07 +0300 Moshe Shemesh wrote:
> Add reload stats to hold the history per reload action type and limit.
>
> For example, the number of times fw_activate has been performed on this
> device since the driver module was added or if the firmware activation
> was performed with or without reset.
>
> Add devlink notification on stats update.
>
> Expose devlink reload stats to the user through devlink dev get command.
>
> Examples:
> $ devlink dev show
> pci/0000:82:00.0:
> stats:
> reload_stats:
> driver_reinit 2
> fw_activate 1
> fw_activate_no_reset 0
> pci/0000:82:00.1:
> stats:
> reload_stats:
> driver_reinit 1
> fw_activate 0
> fw_activate_no_reset 0
>
> $ devlink dev show -jp
> {
> "dev": {
> "pci/0000:82:00.0": {
> "stats": {
> "reload_stats": [ {
> "driver_reinit": 2
> },{
> "fw_activate": 1
> },{
> "fw_activate_no_reset": 0
> } ]
> }
> },
> "pci/0000:82:00.1": {
> "stats": {
> "reload_stats": [ {
> "driver_reinit": 1
> },{
> "fw_activate": 0
> },{
> "fw_activate_no_reset": 0
> } ]

This will be a question to the user space part but IDK why every stat
is in a separate object?

Instead of doing an array of objects -> [ {}, {}, {} ]
make "reload_stats" itself an object.

> }
> }
> }
> }
>
> Signed-off-by: Moshe Shemesh <[email protected]>

Minor nits, looks good overall:

Reviewed-by: Jakub Kicinski <[email protected]>

> diff --git a/net/core/devlink.c b/net/core/devlink.c
> index 6de7d6aa6ed1..05516f1e4c3e 100644
> --- a/net/core/devlink.c
> +++ b/net/core/devlink.c
> @@ -500,10 +500,68 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
> return test_bit(limit, &devlink->ops->reload_limits);
> }
>
> +static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
> + enum devlink_reload_limit limit, u32 value)
> +{
> + struct nlattr *reload_stats_entry;
> +
> + reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
> + if (!reload_stats_entry)
> + return -EMSGSIZE;
> +
> + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action))
> + goto nla_put_failure;
> + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_LIMIT, limit))
> + goto nla_put_failure;
> + if (nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
> + goto nla_put_failure;

nit: it's common to combine such expressions into:

if (nla_put...() ||
nla_put...() ||
nla_put...())
goto ...;

> + nla_nest_end(msg, reload_stats_entry);
> + return 0;
> +
> +nla_put_failure:
> + nla_nest_cancel(msg, reload_stats_entry);
> + return -EMSGSIZE;
> +}
> +
> +static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink)
> +{
> + struct nlattr *reload_stats_attr;
> + int i, j, stat_idx;
> + u32 value;
> +
> + reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
> +
> + if (!reload_stats_attr)
> + return -EMSGSIZE;
> +
> + for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
> + if (j != DEVLINK_RELOAD_LIMIT_UNSPEC &&

Why check this? It can't be supported.

> + !devlink_reload_limit_is_supported(devlink, j))
> + continue;
> + for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
> + if (!devlink_reload_action_is_supported(devlink, i) ||
> + devlink_reload_combination_is_invalid(i, j))

Again, devlink instance would not have been registered with invalid
combinations, right?

> + continue;
> +
> + stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
> + value = devlink->reload_stats[stat_idx];
> + if (devlink_reload_stat_put(msg, i, j, value))
> + goto nla_put_failure;
> + }
> + }
> + nla_nest_end(msg, reload_stats_attr);
> + return 0;
> +
> +nla_put_failure:
> + nla_nest_cancel(msg, reload_stats_attr);
> + return -EMSGSIZE;
> +}


> @@ -3004,6 +3072,34 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
> }
> EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
>
> +static void
> +__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
> + enum devlink_reload_limit limit, unsigned long actions_performed)

> + for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) {

nit: for_each_set_bit

> + if (!test_bit(action, &actions_performed))
> + continue;
> + stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
> + reload_stats[stat_idx]++;
> + }
> + devlink_notify(devlink, DEVLINK_CMD_NEW);
> +}

2020-10-02 15:11:32

by Moshe Shemesh

[permalink] [raw]
Subject: Re: [PATCH net-next 04/16] devlink: Add reload stats


On 10/2/2020 12:25 AM, Jakub Kicinski wrote:
> External email: Use caution opening links or attachments
>
>
> On Thu, 1 Oct 2020 16:59:07 +0300 Moshe Shemesh wrote:
>> Add reload stats to hold the history per reload action type and limit.
>>
>> For example, the number of times fw_activate has been performed on this
>> device since the driver module was added or if the firmware activation
>> was performed with or without reset.
>>
>> Add devlink notification on stats update.
>>
>> Expose devlink reload stats to the user through devlink dev get command.
>>
>> Examples:
>> $ devlink dev show
>> pci/0000:82:00.0:
>> stats:
>> reload_stats:
>> driver_reinit 2
>> fw_activate 1
>> fw_activate_no_reset 0
>> pci/0000:82:00.1:
>> stats:
>> reload_stats:
>> driver_reinit 1
>> fw_activate 0
>> fw_activate_no_reset 0
>>
>> $ devlink dev show -jp
>> {
>> "dev": {
>> "pci/0000:82:00.0": {
>> "stats": {
>> "reload_stats": [ {
>> "driver_reinit": 2
>> },{
>> "fw_activate": 1
>> },{
>> "fw_activate_no_reset": 0
>> } ]
>> }
>> },
>> "pci/0000:82:00.1": {
>> "stats": {
>> "reload_stats": [ {
>> "driver_reinit": 1
>> },{
>> "fw_activate": 0
>> },{
>> "fw_activate_no_reset": 0
>> } ]
> This will be a question to the user space part but IDK why every stat
> is in a separate object?
>
> Instead of doing an array of objects -> [ {}, {}, {} ]
> make "reload_stats" itself an object.


I first thought that there should be an array of stats, looking at it
again, as long as each stat contains just sting and value, object of
pairs will perfectly fit. I will change it.

>> }
>> }
>> }
>> }
>>
>> Signed-off-by: Moshe Shemesh <[email protected]>
> Minor nits, looks good overall:
>
> Reviewed-by: Jakub Kicinski <[email protected]>
>
>> diff --git a/net/core/devlink.c b/net/core/devlink.c
>> index 6de7d6aa6ed1..05516f1e4c3e 100644
>> --- a/net/core/devlink.c
>> +++ b/net/core/devlink.c
>> @@ -500,10 +500,68 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
>> return test_bit(limit, &devlink->ops->reload_limits);
>> }
>>
>> +static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
>> + enum devlink_reload_limit limit, u32 value)
>> +{
>> + struct nlattr *reload_stats_entry;
>> +
>> + reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
>> + if (!reload_stats_entry)
>> + return -EMSGSIZE;
>> +
>> + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action))
>> + goto nla_put_failure;
>> + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_LIMIT, limit))
>> + goto nla_put_failure;
>> + if (nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
>> + goto nla_put_failure;
> nit: it's common to combine such expressions into:
>
> if (nla_put...() ||
> nla_put...() ||
> nla_put...())
> goto ...;


Ack.

>> + nla_nest_end(msg, reload_stats_entry);
>> + return 0;
>> +
>> +nla_put_failure:
>> + nla_nest_cancel(msg, reload_stats_entry);
>> + return -EMSGSIZE;
>> +}
>> +
>> +static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink)
>> +{
>> + struct nlattr *reload_stats_attr;
>> + int i, j, stat_idx;
>> + u32 value;
>> +
>> + reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
>> +
>> + if (!reload_stats_attr)
>> + return -EMSGSIZE;
>> +
>> + for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
>> + if (j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
> Why check this? It can't be supported.
Show stats of the actions with unspecified limit.
>> + !devlink_reload_limit_is_supported(devlink, j))
>> + continue;
>> + for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
>> + if (!devlink_reload_action_is_supported(devlink, i) ||
>> + devlink_reload_combination_is_invalid(i, j))
> Again, devlink instance would not have been registered with invalid
> combinations, right?


It does register actions and register limits, there can be invalid
combination, but devlink will block it before it gets to the driver.

>> + continue;
>> +
>> + stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
>> + value = devlink->reload_stats[stat_idx];
>> + if (devlink_reload_stat_put(msg, i, j, value))
>> + goto nla_put_failure;
>> + }
>> + }
>> + nla_nest_end(msg, reload_stats_attr);
>> + return 0;
>> +
>> +nla_put_failure:
>> + nla_nest_cancel(msg, reload_stats_attr);
>> + return -EMSGSIZE;
>> +}
>
>> @@ -3004,6 +3072,34 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
>> }
>> EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
>>
>> +static void
>> +__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
>> + enum devlink_reload_limit limit, unsigned long actions_performed)
>> + for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) {
> nit: for_each_set_bit
Ack.
>> + if (!test_bit(action, &actions_performed))
>> + continue;
>> + stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
>> + reload_stats[stat_idx]++;
>> + }
>> + devlink_notify(devlink, DEVLINK_CMD_NEW);
>> +}

2020-10-03 09:01:41

by Jiri Pirko

[permalink] [raw]
Subject: Re: [PATCH net-next 04/16] devlink: Add reload stats

Thu, Oct 01, 2020 at 03:59:07PM CEST, [email protected] wrote:
>Add reload stats to hold the history per reload action type and limit.
>
>For example, the number of times fw_activate has been performed on this
>device since the driver module was added or if the firmware activation
>was performed with or without reset.
>
>Add devlink notification on stats update.
>
>Expose devlink reload stats to the user through devlink dev get command.
>
>Examples:
>$ devlink dev show
>pci/0000:82:00.0:
> stats:
> reload_stats:
> driver_reinit 2
> fw_activate 1
> fw_activate_no_reset 0
>pci/0000:82:00.1:
> stats:
> reload_stats:
> driver_reinit 1
> fw_activate 0
> fw_activate_no_reset 0
>
>$ devlink dev show -jp
>{
> "dev": {
> "pci/0000:82:00.0": {
> "stats": {
> "reload_stats": [ {

Just "reload". No need to repeat "stats" here.


> "driver_reinit": 2
> },{
> "fw_activate": 1
> },{
> "fw_activate_no_reset": 0
> } ]
> }
> },
> "pci/0000:82:00.1": {
> "stats": {
> "reload_stats": [ {
> "driver_reinit": 1
> },{
> "fw_activate": 0
> },{
> "fw_activate_no_reset": 0
> } ]
> }
> }
> }
>}
>
>Signed-off-by: Moshe Shemesh <[email protected]>
>---
>RFCv5 -> v1:
>- Changed the stats output structure, have 2 stats, one for local and
>one for remote
>- Resplit this patch and the next one by remote/local reload stast
>instead of set/get reload stats
>- Add helper function devlink_reload_stats_put()
>RFCv4 -> RFCv5:
>- Add separate reload action stats for updating on remote actions
>- Protect from updating remote actions stats during reload_down()/up()
>RFCv3 -> RFCv4:
>- Renamed reload_actions_cnts to reload_action_stats
>- Add devlink notifications on stats update
>- Renamed devlink_reload_actions_implicit_actions_performed() and add
> function comment in code
>RFCv2 -> RFCv3:
>- New patch
>---
> include/net/devlink.h | 7 +++
> include/uapi/linux/devlink.h | 5 ++
> net/core/devlink.c | 97 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 109 insertions(+)
>
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 43dde69086e5..0f3bd23b6c04 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -20,6 +20,9 @@
> #include <uapi/linux/devlink.h>
> #include <linux/xarray.h>
>
>+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
>+ (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
>+
> struct devlink_ops;
>
> struct devlink {
>@@ -38,6 +41,7 @@ struct devlink {
> struct list_head trap_policer_list;
> const struct devlink_ops *ops;
> struct xarray snapshot_ids;
>+ u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
> struct device *dev;
> possible_net_t _net;
> struct mutex lock; /* Serializes access to devlink instance specific objects such as
>@@ -1470,6 +1474,9 @@ void
> devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
>
> bool devlink_is_reload_failed(const struct devlink *devlink);
>+void devlink_remote_reload_actions_performed(struct devlink *devlink,
>+ enum devlink_reload_limit limit,
>+ unsigned long actions_performed);

Leftover, please remove/move.


>
> void devlink_flash_update_begin_notify(struct devlink *devlink);
> void devlink_flash_update_end_notify(struct devlink *devlink);
>diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>index cc5dc4c07b4a..97e0137f6201 100644
>--- a/include/uapi/linux/devlink.h
>+++ b/include/uapi/linux/devlink.h
>@@ -526,6 +526,11 @@ enum devlink_attr {
> DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, /* u64 */
> DEVLINK_ATTR_RELOAD_LIMIT, /* u8 */
>
>+ DEVLINK_ATTR_DEV_STATS, /* nested */
>+ DEVLINK_ATTR_RELOAD_STATS, /* nested */
>+ DEVLINK_ATTR_RELOAD_STATS_ENTRY, /* nested */
>+ DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */
>+
> /* add new attributes above here, update the policy in devlink.c */
>
> __DEVLINK_ATTR_MAX,
>diff --git a/net/core/devlink.c b/net/core/devlink.c
>index 6de7d6aa6ed1..05516f1e4c3e 100644
>--- a/net/core/devlink.c
>+++ b/net/core/devlink.c
>@@ -500,10 +500,68 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
> return test_bit(limit, &devlink->ops->reload_limits);
> }
>
>+static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
>+ enum devlink_reload_limit limit, u32 value)
>+{
>+ struct nlattr *reload_stats_entry;
>+
>+ reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
>+ if (!reload_stats_entry)
>+ return -EMSGSIZE;
>+
>+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action))
>+ goto nla_put_failure;
>+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_LIMIT, limit))
>+ goto nla_put_failure;
>+ if (nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
>+ goto nla_put_failure;
>+ nla_nest_end(msg, reload_stats_entry);
>+ return 0;
>+
>+nla_put_failure:
>+ nla_nest_cancel(msg, reload_stats_entry);
>+ return -EMSGSIZE;
>+}
>+
>+static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink)
>+{
>+ struct nlattr *reload_stats_attr;
>+ int i, j, stat_idx;
>+ u32 value;
>+
>+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
>+
>+ if (!reload_stats_attr)
>+ return -EMSGSIZE;
>+
>+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
>+ if (j != DEVLINK_RELOAD_LIMIT_UNSPEC &&

You should check limit_unspec during driver register, not here.


>+ !devlink_reload_limit_is_supported(devlink, j))
>+ continue;
>+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
>+ if (!devlink_reload_action_is_supported(devlink, i) ||
>+ devlink_reload_combination_is_invalid(i, j))
>+ continue;
>+
>+ stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
>+ value = devlink->reload_stats[stat_idx];
>+ if (devlink_reload_stat_put(msg, i, j, value))
>+ goto nla_put_failure;
>+ }
>+ }
>+ nla_nest_end(msg, reload_stats_attr);
>+ return 0;
>+
>+nla_put_failure:
>+ nla_nest_cancel(msg, reload_stats_attr);
>+ return -EMSGSIZE;
>+}
>+
> static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
> enum devlink_command cmd, u32 portid,
> u32 seq, int flags)
> {
>+ struct nlattr *dev_stats;
> void *hdr;
>
> hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
>@@ -515,9 +573,19 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
> if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
> goto nla_put_failure;
>
>+ dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);

Avoid the "DEV". Just "DEVLINK_ATTR_STATS" is enough.


>+ if (!dev_stats)
>+ goto nla_put_failure;
>+
>+ if (devlink_reload_stats_put(msg, devlink))
>+ goto dev_stats_nest_cancel;
>+
>+ nla_nest_end(msg, dev_stats);
> genlmsg_end(msg, hdr);
> return 0;
>
>+dev_stats_nest_cancel:
>+ nla_nest_cancel(msg, dev_stats);
> nla_put_failure:
> genlmsg_cancel(msg, hdr);
> return -EMSGSIZE;
>@@ -3004,6 +3072,34 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
> }
> EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
>
>+static void
>+__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
>+ enum devlink_reload_limit limit, unsigned long actions_performed)
>+{
>+ int stat_idx;
>+ int action;
>+
>+ if (!actions_performed)
>+ return;
>+
>+ if (WARN_ON(limit > DEVLINK_RELOAD_LIMIT_MAX))

I don't understand the reason for this check and warn on. You should
sanitize this in the caller (I think you already do that).

>+ return;
>+ for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) {
>+ if (!test_bit(action, &actions_performed))
>+ continue;
>+ stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
>+ reload_stats[stat_idx]++;
>+ }
>+ devlink_notify(devlink, DEVLINK_CMD_NEW);
>+}
>+
>+static void
>+devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
>+ unsigned long actions_performed)
>+{
>+ __devlink_reload_stats_update(devlink, devlink->reload_stats, limit, actions_performed);
>+}
>+
> static int devlink_reload(struct devlink *devlink, struct net *dest_net,
> enum devlink_reload_action action, enum devlink_reload_limit limit,
> struct netlink_ext_ack *extack, unsigned long *actions_performed)
>@@ -3026,6 +3122,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
> return err;
>
> WARN_ON(!test_bit(action, actions_performed));
>+ devlink_reload_stats_update(devlink, limit, *actions_performed);
> return 0;
> }
>
>--
>2.18.2
>

2020-10-04 07:00:37

by Moshe Shemesh

[permalink] [raw]
Subject: Re: [PATCH net-next 04/16] devlink: Add reload stats


On 10/3/2020 12:00 PM, Jiri Pirko wrote:
> Thu, Oct 01, 2020 at 03:59:07PM CEST, [email protected] wrote:
>> Add reload stats to hold the history per reload action type and limit.
>>
>> For example, the number of times fw_activate has been performed on this
>> device since the driver module was added or if the firmware activation
>> was performed with or without reset.
>>
>> Add devlink notification on stats update.
>>
>> Expose devlink reload stats to the user through devlink dev get command.
>>
>> Examples:
>> $ devlink dev show
>> pci/0000:82:00.0:
>> stats:
>> reload_stats:
>> driver_reinit 2
>> fw_activate 1
>> fw_activate_no_reset 0
>> pci/0000:82:00.1:
>> stats:
>> reload_stats:
>> driver_reinit 1
>> fw_activate 0
>> fw_activate_no_reset 0
>>
>> $ devlink dev show -jp
>> {
>> "dev": {
>> "pci/0000:82:00.0": {
>> "stats": {
>> "reload_stats": [ {
> Just "reload". No need to repeat "stats" here.
Ack.
>
>> "driver_reinit": 2
>> },{
>> "fw_activate": 1
>> },{
>> "fw_activate_no_reset": 0
>> } ]
>> }
>> },
>> "pci/0000:82:00.1": {
>> "stats": {
>> "reload_stats": [ {
>> "driver_reinit": 1
>> },{
>> "fw_activate": 0
>> },{
>> "fw_activate_no_reset": 0
>> } ]
>> }
>> }
>> }
>> }
>>
>> Signed-off-by: Moshe Shemesh <[email protected]>
>> ---
>> RFCv5 -> v1:
>> - Changed the stats output structure, have 2 stats, one for local and
>> one for remote
>> - Resplit this patch and the next one by remote/local reload stast
>> instead of set/get reload stats
>> - Add helper function devlink_reload_stats_put()
>> RFCv4 -> RFCv5:
>> - Add separate reload action stats for updating on remote actions
>> - Protect from updating remote actions stats during reload_down()/up()
>> RFCv3 -> RFCv4:
>> - Renamed reload_actions_cnts to reload_action_stats
>> - Add devlink notifications on stats update
>> - Renamed devlink_reload_actions_implicit_actions_performed() and add
>> function comment in code
>> RFCv2 -> RFCv3:
>> - New patch
>> ---
>> include/net/devlink.h | 7 +++
>> include/uapi/linux/devlink.h | 5 ++
>> net/core/devlink.c | 97 ++++++++++++++++++++++++++++++++++++
>> 3 files changed, 109 insertions(+)
>>
>> diff --git a/include/net/devlink.h b/include/net/devlink.h
>> index 43dde69086e5..0f3bd23b6c04 100644
>> --- a/include/net/devlink.h
>> +++ b/include/net/devlink.h
>> @@ -20,6 +20,9 @@
>> #include <uapi/linux/devlink.h>
>> #include <linux/xarray.h>
>>
>> +#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
>> + (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
>> +
>> struct devlink_ops;
>>
>> struct devlink {
>> @@ -38,6 +41,7 @@ struct devlink {
>> struct list_head trap_policer_list;
>> const struct devlink_ops *ops;
>> struct xarray snapshot_ids;
>> + u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
>> struct device *dev;
>> possible_net_t _net;
>> struct mutex lock; /* Serializes access to devlink instance specific objects such as
>> @@ -1470,6 +1474,9 @@ void
>> devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
>>
>> bool devlink_is_reload_failed(const struct devlink *devlink);
>> +void devlink_remote_reload_actions_performed(struct devlink *devlink,
>> + enum devlink_reload_limit limit,
>> + unsigned long actions_performed);
> Leftover, please remove/move.
>

Yes, should be in the next patch, I missed it while re-splitting these
two patches.

>> void devlink_flash_update_begin_notify(struct devlink *devlink);
>> void devlink_flash_update_end_notify(struct devlink *devlink);
>> diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>> index cc5dc4c07b4a..97e0137f6201 100644
>> --- a/include/uapi/linux/devlink.h
>> +++ b/include/uapi/linux/devlink.h
>> @@ -526,6 +526,11 @@ enum devlink_attr {
>> DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, /* u64 */
>> DEVLINK_ATTR_RELOAD_LIMIT, /* u8 */
>>
>> + DEVLINK_ATTR_DEV_STATS, /* nested */
>> + DEVLINK_ATTR_RELOAD_STATS, /* nested */
>> + DEVLINK_ATTR_RELOAD_STATS_ENTRY, /* nested */
>> + DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */
>> +
>> /* add new attributes above here, update the policy in devlink.c */
>>
>> __DEVLINK_ATTR_MAX,
>> diff --git a/net/core/devlink.c b/net/core/devlink.c
>> index 6de7d6aa6ed1..05516f1e4c3e 100644
>> --- a/net/core/devlink.c
>> +++ b/net/core/devlink.c
>> @@ -500,10 +500,68 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
>> return test_bit(limit, &devlink->ops->reload_limits);
>> }
>>
>> +static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
>> + enum devlink_reload_limit limit, u32 value)
>> +{
>> + struct nlattr *reload_stats_entry;
>> +
>> + reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
>> + if (!reload_stats_entry)
>> + return -EMSGSIZE;
>> +
>> + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action))
>> + goto nla_put_failure;
>> + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_LIMIT, limit))
>> + goto nla_put_failure;
>> + if (nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
>> + goto nla_put_failure;
>> + nla_nest_end(msg, reload_stats_entry);
>> + return 0;
>> +
>> +nla_put_failure:
>> + nla_nest_cancel(msg, reload_stats_entry);
>> + return -EMSGSIZE;
>> +}
>> +
>> +static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink)
>> +{
>> + struct nlattr *reload_stats_attr;
>> + int i, j, stat_idx;
>> + u32 value;
>> +
>> + reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
>> +
>> + if (!reload_stats_attr)
>> + return -EMSGSIZE;
>> +
>> + for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
>> + if (j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
> You should check limit_unspec during driver register, not here.


The thing is that now (change from RFCv5 to v1, see patch 3 change log)
driver does not need to register supported limits if it only supports
actions without any limitation. So I want to show stats of actions with
unspecified limit, though I don't expect driver to register it.

>
>> + !devlink_reload_limit_is_supported(devlink, j))
>> + continue;
>> + for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
>> + if (!devlink_reload_action_is_supported(devlink, i) ||
>> + devlink_reload_combination_is_invalid(i, j))
>> + continue;
>> +
>> + stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
>> + value = devlink->reload_stats[stat_idx];
>> + if (devlink_reload_stat_put(msg, i, j, value))
>> + goto nla_put_failure;
>> + }
>> + }
>> + nla_nest_end(msg, reload_stats_attr);
>> + return 0;
>> +
>> +nla_put_failure:
>> + nla_nest_cancel(msg, reload_stats_attr);
>> + return -EMSGSIZE;
>> +}
>> +
>> static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
>> enum devlink_command cmd, u32 portid,
>> u32 seq, int flags)
>> {
>> + struct nlattr *dev_stats;
>> void *hdr;
>>
>> hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
>> @@ -515,9 +573,19 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
>> if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
>> goto nla_put_failure;
>>
>> + dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
> Avoid the "DEV". Just "DEVLINK_ATTR_STATS" is enough.


"DEVLINK_ATTR_STATS" is already used for something else (traps), I added
DEV as its dev get command stats.

>
>> + if (!dev_stats)
>> + goto nla_put_failure;
>> +
>> + if (devlink_reload_stats_put(msg, devlink))
>> + goto dev_stats_nest_cancel;
>> +
>> + nla_nest_end(msg, dev_stats);
>> genlmsg_end(msg, hdr);
>> return 0;
>>
>> +dev_stats_nest_cancel:
>> + nla_nest_cancel(msg, dev_stats);
>> nla_put_failure:
>> genlmsg_cancel(msg, hdr);
>> return -EMSGSIZE;
>> @@ -3004,6 +3072,34 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
>> }
>> EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
>>
>> +static void
>> +__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
>> + enum devlink_reload_limit limit, unsigned long actions_performed)
>> +{
>> + int stat_idx;
>> + int action;
>> +
>> + if (!actions_performed)
>> + return;
>> +
>> + if (WARN_ON(limit > DEVLINK_RELOAD_LIMIT_MAX))
> I don't understand the reason for this check and warn on. You should
> sanitize this in the caller (I think you already do that).


That's because the next patch has a call to this function directly from
the driver. I should add this check on the caller in the next patch.

>> + return;
>> + for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) {
>> + if (!test_bit(action, &actions_performed))
>> + continue;
>> + stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
>> + reload_stats[stat_idx]++;
>> + }
>> + devlink_notify(devlink, DEVLINK_CMD_NEW);
>> +}
>> +
>> +static void
>> +devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
>> + unsigned long actions_performed)
>> +{
>> + __devlink_reload_stats_update(devlink, devlink->reload_stats, limit, actions_performed);
>> +}
>> +
>> static int devlink_reload(struct devlink *devlink, struct net *dest_net,
>> enum devlink_reload_action action, enum devlink_reload_limit limit,
>> struct netlink_ext_ack *extack, unsigned long *actions_performed)
>> @@ -3026,6 +3122,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
>> return err;
>>
>> WARN_ON(!test_bit(action, actions_performed));
>> + devlink_reload_stats_update(devlink, limit, *actions_performed);
>> return 0;
>> }
>>
>> --
>> 2.18.2
>>