On Mon, Feb 07, 2022 at 08:46:44PM -0800, Colin Foster wrote:
> Create and utilize bulk regmap reads instead of single access for gathering
> stats. The background reading of statistics happens frequently, and over
> a few contiguous memory regions.
>
> High speed PCIe buses and MMIO access will probably see negligible
> performance increase. Lower speed buses like SPI and I2C could see
> significant performance increase, since the bus configuration and register
> access times account for a large percentage of data transfer time.
>
> Signed-off-by: Colin Foster <[email protected]>
> ---
Reviewed-by: Vladimir Oltean <[email protected]>
Just one minor comment below, but all in all it's fine as is, too.
> drivers/net/ethernet/mscc/ocelot.c | 78 +++++++++++++++++++++++++-----
> include/soc/mscc/ocelot.h | 8 +++
> 2 files changed, 73 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> index 455293aa6343..5efb1f3a1410 100644
> --- a/drivers/net/ethernet/mscc/ocelot.c
> +++ b/drivers/net/ethernet/mscc/ocelot.c
> @@ -1737,32 +1737,41 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
> }
> EXPORT_SYMBOL(ocelot_get_strings);
>
> -static void ocelot_update_stats(struct ocelot *ocelot)
> +static int ocelot_update_stats(struct ocelot *ocelot)
> {
> - int i, j;
> + struct ocelot_stats_region *region;
> + int i, j, err = 0;
>
> mutex_lock(&ocelot->stats_lock);
>
> for (i = 0; i < ocelot->num_phys_ports; i++) {
> + unsigned int idx = 0;
> +
> /* Configure the port to read the stats from */
> ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
>
> - for (j = 0; j < ocelot->num_stats; j++) {
> - u32 val;
> - unsigned int idx = i * ocelot->num_stats + j;
> + list_for_each_entry(region, &ocelot->stats_regions, node) {
> + err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> + region->offset, region->buf,
> + region->count);
> + if (err)
> + goto out;
>
> - val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> - ocelot->stats_layout[j].offset);
> + for (j = 0; j < region->count; j++) {
> + if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
> + ocelot->stats[idx + j] += (u64)1 << 32;
>
> - if (val < (ocelot->stats[idx] & U32_MAX))
> - ocelot->stats[idx] += (u64)1 << 32;
> + ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
> + ~(u64)U32_MAX) + region->buf[j];
> + }
>
> - ocelot->stats[idx] = (ocelot->stats[idx] &
> - ~(u64)U32_MAX) + val;
> + idx += region->count;
> }
> }
>
> +out:
> mutex_unlock(&ocelot->stats_lock);
> + return err;
> }
>
> static void ocelot_check_stats_work(struct work_struct *work)
> @@ -1779,10 +1788,11 @@ static void ocelot_check_stats_work(struct work_struct *work)
>
> void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
> {
> - int i;
> + int i, err;
>
> /* check and update now */
> - ocelot_update_stats(ocelot);
> + err = ocelot_update_stats(ocelot);
> + WARN_ONCE(err, "Error %d updating ethtool stats\n", err);
I think a dev_err(ocelot->dev, ...) would be more appropriate here.
WARN_ON() should be used for truly critical errors (things that should
never happen unless bugs are present). When the assertion holds true, a
WARN_ON() will print a stack trace, and when "panic_on_warn" is enabled,
a WARN() is effectively a BUG() and crashes the kernel.
include/asm-generic/bug.h says:
/*
* WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
* significant kernel issues that need prompt attention if they should ever
* appear at runtime.
*
* Do not use these macros when checking for invalid external inputs
* (e.g. invalid system call arguments, or invalid data coming from
* network/devices), and on transient conditions like ENOMEM or EAGAIN.
* These macros should be used for recoverable kernel issues only.
* For invalid external inputs, transient conditions, etc use
* pr_err[_once/_ratelimited]() followed by dump_stack(), if necessary.
* Do not include "BUG"/"WARNING" in format strings manually to make these
* conditions distinguishable from kernel issues.
*
* Use the versions with printk format strings to provide better diagnostics.
*/
>
> /* Copy all counters */
> for (i = 0; i < ocelot->num_stats; i++)
> @@ -1799,6 +1809,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
> }
> EXPORT_SYMBOL(ocelot_get_sset_count);
>
> +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> +{
> + struct ocelot_stats_region *region = NULL;
> + unsigned int last;
> + int i;
> +
> + INIT_LIST_HEAD(&ocelot->stats_regions);
> +
> + for (i = 0; i < ocelot->num_stats; i++) {
> + if (region && ocelot->stats_layout[i].offset == last + 1) {
> + region->count++;
> + } else {
> + region = devm_kzalloc(ocelot->dev, sizeof(*region),
> + GFP_KERNEL);
> + if (!region)
> + return -ENOMEM;
> +
> + region->offset = ocelot->stats_layout[i].offset;
> + region->count = 1;
> + list_add_tail(®ion->node, &ocelot->stats_regions);
> + }
> +
> + last = ocelot->stats_layout[i].offset;
> + }
> +
> + list_for_each_entry(region, &ocelot->stats_regions, node) {
> + region->buf = devm_kcalloc(ocelot->dev, region->count,
> + sizeof(*region->buf), GFP_KERNEL);
> + if (!region->buf)
> + return -ENOMEM;
> + }
> +
> + return 0;
> +}
> +
> int ocelot_get_ts_info(struct ocelot *ocelot, int port,
> struct ethtool_ts_info *info)
> {
> @@ -2799,6 +2844,13 @@ int ocelot_init(struct ocelot *ocelot)
> ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
> ANA_CPUQ_8021_CFG, i);
>
> + ret = ocelot_prepare_stats_regions(ocelot);
> + if (ret) {
> + destroy_workqueue(ocelot->stats_queue);
> + destroy_workqueue(ocelot->owq);
> + return ret;
> + }
> +
> INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
> queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
> OCELOT_STATS_CHECK_DELAY);
> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> index 312b72558659..d3291a5f7e88 100644
> --- a/include/soc/mscc/ocelot.h
> +++ b/include/soc/mscc/ocelot.h
> @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
> char name[ETH_GSTRING_LEN];
> };
>
> +struct ocelot_stats_region {
> + struct list_head node;
> + u32 offset;
> + int count;
> + u32 *buf;
> +};
> +
> enum ocelot_tag_prefix {
> OCELOT_TAG_PREFIX_DISABLED = 0,
> OCELOT_TAG_PREFIX_NONE,
> @@ -673,6 +680,7 @@ struct ocelot {
> struct regmap_field *regfields[REGFIELD_MAX];
> const u32 *const *map;
> const struct ocelot_stat_layout *stats_layout;
> + struct list_head stats_regions;
> unsigned int num_stats;
>
> u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> --
> 2.25.1
>