Create and utilize bulk regmap reads instead of single access for gathering
stats. The background reading of statistics happens frequently, and over
a few contiguous memory regions.
High speed PCIe buses and MMIO access will probably see negligible
performance increase. Lower speed buses like SPI and I2C could see
significant performance increase, since the bus configuration and register
access times account for a large percentage of data transfer time.
Signed-off-by: Colin Foster <[email protected]>
---
drivers/net/ethernet/mscc/ocelot.c | 92 +++++++++++++++++++++++++-----
include/soc/mscc/ocelot.h | 8 +++
2 files changed, 85 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 885288eb389c..637fd79402e0 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1746,25 +1746,36 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
EXPORT_SYMBOL(ocelot_get_strings);
/* Caller must hold &ocelot->stats_lock */
-static void ocelot_port_update_stats(struct ocelot *ocelot, int port)
+static int ocelot_port_update_stats(struct ocelot *ocelot, int port)
{
- int j;
+ unsigned int idx = port * ocelot->num_stats;
+ struct ocelot_stats_region *region;
+ int err, j;
/* Configure the port to read the stats from */
ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), SYS_STAT_CFG);
- for (j = 0; j < ocelot->num_stats; j++) {
- u32 val;
- unsigned int idx = port * ocelot->num_stats + j;
+ list_for_each_entry(region, &ocelot->stats_regions, node) {
+ err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
+ region->offset, region->buf,
+ region->count);
+ if (err)
+ return err;
- val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
- ocelot->stats_layout[j].offset);
+ for (j = 0; j < region->count; j++) {
+ u64 *stat = &ocelot->stats[idx + j];
+ u64 val = region->buf[j];
- if (val < (ocelot->stats[idx] & U32_MAX))
- ocelot->stats[idx] += (u64)1 << 32;
+ if (val < (*stat & U32_MAX))
+ *stat += (u64)1 << 32;
- ocelot->stats[idx] = (ocelot->stats[idx] & ~(u64)U32_MAX) + val;
+ *stat = (*stat & ~(u64)U32_MAX) + val;
+ }
+
+ idx += region->count;
}
+
+ return err;
}
static void ocelot_check_stats_work(struct work_struct *work)
@@ -1772,31 +1783,40 @@ static void ocelot_check_stats_work(struct work_struct *work)
struct delayed_work *del_work = to_delayed_work(work);
struct ocelot *ocelot = container_of(del_work, struct ocelot,
stats_work);
- int i;
+ int i, err;
mutex_lock(&ocelot->stats_lock);
- for (i = 0; i < ocelot->num_phys_ports; i++)
- ocelot_port_update_stats(ocelot, i);
+ for (i = 0; i < ocelot->num_phys_ports; i++) {
+ err = ocelot_port_update_stats(ocelot, i);
+ if (err)
+ break;
+ }
mutex_unlock(&ocelot->stats_lock);
+ if (err)
+ dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err);
+
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
}
void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
{
- int i;
+ int i, err;
mutex_lock(&ocelot->stats_lock);
/* check and update now */
- ocelot_port_update_stats(ocelot, port);
+ err = ocelot_port_update_stats(ocelot, port);
/* Copy all counters */
for (i = 0; i < ocelot->num_stats; i++)
*data++ = ocelot->stats[port * ocelot->num_stats + i];
mutex_unlock(&ocelot->stats_lock);
+
+ if (err)
+ dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err);
}
EXPORT_SYMBOL(ocelot_get_ethtool_stats);
@@ -1809,6 +1829,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
}
EXPORT_SYMBOL(ocelot_get_sset_count);
+static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
+{
+ struct ocelot_stats_region *region = NULL;
+ unsigned int last;
+ int i;
+
+ INIT_LIST_HEAD(&ocelot->stats_regions);
+
+ for (i = 0; i < ocelot->num_stats; i++) {
+ if (region && ocelot->stats_layout[i].offset == last + 1) {
+ region->count++;
+ } else {
+ region = devm_kzalloc(ocelot->dev, sizeof(*region),
+ GFP_KERNEL);
+ if (!region)
+ return -ENOMEM;
+
+ region->offset = ocelot->stats_layout[i].offset;
+ region->count = 1;
+ list_add_tail(®ion->node, &ocelot->stats_regions);
+ }
+
+ last = ocelot->stats_layout[i].offset;
+ }
+
+ list_for_each_entry(region, &ocelot->stats_regions, node) {
+ region->buf = devm_kcalloc(ocelot->dev, region->count,
+ sizeof(*region->buf), GFP_KERNEL);
+ if (!region->buf)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
int ocelot_get_ts_info(struct ocelot *ocelot, int port,
struct ethtool_ts_info *info)
{
@@ -2809,6 +2864,13 @@ int ocelot_init(struct ocelot *ocelot)
ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
ANA_CPUQ_8021_CFG, i);
+ ret = ocelot_prepare_stats_regions(ocelot);
+ if (ret) {
+ destroy_workqueue(ocelot->stats_queue);
+ destroy_workqueue(ocelot->owq);
+ return ret;
+ }
+
INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 312b72558659..d3291a5f7e88 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -542,6 +542,13 @@ struct ocelot_stat_layout {
char name[ETH_GSTRING_LEN];
};
+struct ocelot_stats_region {
+ struct list_head node;
+ u32 offset;
+ int count;
+ u32 *buf;
+};
+
enum ocelot_tag_prefix {
OCELOT_TAG_PREFIX_DISABLED = 0,
OCELOT_TAG_PREFIX_NONE,
@@ -673,6 +680,7 @@ struct ocelot {
struct regmap_field *regfields[REGFIELD_MAX];
const u32 *const *map;
const struct ocelot_stat_layout *stats_layout;
+ struct list_head stats_regions;
unsigned int num_stats;
u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
--
2.25.1
On Sun, Feb 13, 2022 at 11:12:54AM -0800, Colin Foster wrote:
> Create and utilize bulk regmap reads instead of single access for gathering
> stats. The background reading of statistics happens frequently, and over
> a few contiguous memory regions.
>
> High speed PCIe buses and MMIO access will probably see negligible
> performance increase. Lower speed buses like SPI and I2C could see
> significant performance increase, since the bus configuration and register
> access times account for a large percentage of data transfer time.
>
> Signed-off-by: Colin Foster <[email protected]>
> ---
Reviewed-by: Vladimir Oltean <[email protected]>
Tested-by: Vladimir Oltean <[email protected]>