Ocelot loops over memory regions to gather stats on different ports.
These regions are mostly continuous, and are ordered. This patch set
uses that information to break the stats reads into regions that can get
read in bulk.
The motiviation is for general cleanup, but also for SPI. Performing two
back-to-back reads on a SPI bus require toggling the CS line, holding,
re-toggling the CS line, sending 3 address bytes, sending N padding
bytes, then actually performing the read. Bulk reads could reduce almost
all of that overhead, but require that the reads are performed via
regmap_bulk_read.
Colin Foster (2):
net: mscc: ocelot: add ability to perform bulk reads
net: mscc: ocelot: use bulk reads for stats
drivers/net/ethernet/mscc/ocelot.c | 76 ++++++++++++++++++++++-----
drivers/net/ethernet/mscc/ocelot_io.c | 13 +++++
include/soc/mscc/ocelot.h | 12 +++++
3 files changed, 88 insertions(+), 13 deletions(-)
--
2.25.1
Regmap supports bulk register reads. Ocelot does not. This patch adds
support for Ocelot to invoke bulk regmap reads.
ocelot_update_stats loops over continguous memory regions. Performing bulk
reads over these regions will allow drivers to optimize bulk optimizations
to thrive.
Specifically, SPI operations for Ocelot chips will be improved drastically.
The process of transitioning from subsequent register reads removes the
need to de-assert CS, re-assert CS, send 3 address bytes, and send N
paddings bytes. With one padding byte, that is an overhead of 100% for the
protocol alone. This overhead will no longer need to be incurred.
This patch should have minimal effect on existing systems. There is a small
memory hit (maybe 1KB) and linked-list navigation runtime performance. The
runtime penalty is probably offset by lower-level lock cleanup, so it is
entirely possible that this patch actually improves runtime performance of
existing systems.
Signed-off-by: Colin Foster <[email protected]>
---
drivers/net/ethernet/mscc/ocelot_io.c | 13 +++++++++++++
include/soc/mscc/ocelot.h | 4 ++++
2 files changed, 17 insertions(+)
diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c
index 7390fa3980ec..2067382d0ee1 100644
--- a/drivers/net/ethernet/mscc/ocelot_io.c
+++ b/drivers/net/ethernet/mscc/ocelot_io.c
@@ -10,6 +10,19 @@
#include "ocelot.h"
+int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf,
+ int count)
+{
+ u16 target = reg >> TARGET_OFFSET;
+
+ WARN_ON(!target);
+
+ return regmap_bulk_read(ocelot->targets[target],
+ ocelot->map[target][reg & REG_MASK] + offset,
+ buf, count);
+}
+EXPORT_SYMBOL_GPL(__ocelot_bulk_read_ix);
+
u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset)
{
u16 target = reg >> TARGET_OFFSET;
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 5e6b9d6c4c1b..6f74015032fc 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -744,6 +744,8 @@ struct ocelot_policer {
u32 burst; /* bytes */
};
+#define ocelot_bulk_read_rix(ocelot, reg, ri, buf, count) __ocelot_bulk_read_ix(ocelot, reg, reg##_RSZ * (ri), buf, count)
+
#define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
#define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
#define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
@@ -786,6 +788,8 @@ struct ocelot_policer {
u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg);
+int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf,
+ int count);
u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
--
2.25.1
Create and utilize bulk regmap reads instead of single access for gathering
stats. This will allow slower buses (SPI) the ability to drop significant
overhead performing new transactions that could instead be sequential
access.
Signed-off-by: Colin Foster <[email protected]>
---
drivers/net/ethernet/mscc/ocelot.c | 76 +++++++++++++++++++++++++-----
include/soc/mscc/ocelot.h | 8 ++++
2 files changed, 71 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 9b42187a026a..57b40de9fcdc 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1697,32 +1697,40 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
}
EXPORT_SYMBOL(ocelot_get_strings);
-static void ocelot_update_stats(struct ocelot *ocelot)
+static int ocelot_update_stats(struct ocelot *ocelot)
{
- int i, j;
+ struct ocelot_stats_region *region;
+ int i, j, err = 0;
mutex_lock(&ocelot->stats_lock);
for (i = 0; i < ocelot->num_phys_ports; i++) {
+ unsigned int idx = 0;
/* Configure the port to read the stats from */
ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
- for (j = 0; j < ocelot->num_stats; j++) {
- u32 val;
- unsigned int idx = i * ocelot->num_stats + j;
+ list_for_each_entry(region, &ocelot->stats_regions, node) {
+ err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
+ region->offset, region->buf,
+ region->count);
+ if (err)
+ goto out;
- val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
- ocelot->stats_layout[j].offset);
+ for (j = 0; j < region->count; j++) {
+ if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
+ ocelot->stats[idx + j] += (u64)1 << 32;
- if (val < (ocelot->stats[idx] & U32_MAX))
- ocelot->stats[idx] += (u64)1 << 32;
+ ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
+ ~(u64)U32_MAX) + region->buf[j];
+ }
- ocelot->stats[idx] = (ocelot->stats[idx] &
- ~(u64)U32_MAX) + val;
+ idx += region->count;
}
}
+out:
mutex_unlock(&ocelot->stats_lock);
+ return err;
}
static void ocelot_check_stats_work(struct work_struct *work)
@@ -1739,10 +1747,11 @@ static void ocelot_check_stats_work(struct work_struct *work)
void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
{
- int i;
+ int i, err;
/* check and update now */
- ocelot_update_stats(ocelot);
+ err = ocelot_update_stats(ocelot);
+ WARN_ONCE(err, "Error %d updating ethtool stats\n", err);
/* Copy all counters */
for (i = 0; i < ocelot->num_stats; i++)
@@ -1759,6 +1768,43 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
}
EXPORT_SYMBOL(ocelot_get_sset_count);
+static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
+{
+ struct ocelot_stats_region *region = NULL;
+ unsigned int last;
+ int i;
+
+ INIT_LIST_HEAD(&ocelot->stats_regions);
+
+ for (i = 0; i < ocelot->num_stats; i++) {
+ if (region && ocelot->stats_layout[i].offset == last + 1) {
+ region->count++;
+ } else {
+ region = devm_kzalloc(ocelot->dev, sizeof(*region),
+ GFP_KERNEL);
+ if (!region)
+ return -ENOMEM;
+
+ region->offset = ocelot->stats_layout[i].offset;
+ region->count = 1;
+ list_add_tail(®ion->node, &ocelot->stats_regions);
+ }
+
+ last = ocelot->stats_layout[i].offset;
+ }
+
+ list_for_each_entry(region, &ocelot->stats_regions, node) {
+ region->buf = devm_kzalloc(ocelot->dev,
+ region->count * sizeof(*region->buf),
+ GFP_KERNEL);
+
+ if (!region->buf)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
int ocelot_get_ts_info(struct ocelot *ocelot, int port,
struct ethtool_ts_info *info)
{
@@ -2763,6 +2809,10 @@ int ocelot_init(struct ocelot *ocelot)
ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
ANA_CPUQ_8021_CFG, i);
+ ret = ocelot_prepare_stats_regions(ocelot);
+ if (ret)
+ return ret;
+
INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 6f74015032fc..1f11c27bf29f 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -542,6 +542,13 @@ struct ocelot_stat_layout {
char name[ETH_GSTRING_LEN];
};
+struct ocelot_stats_region {
+ struct list_head node;
+ u32 offset;
+ int count;
+ u32 *buf;
+};
+
enum ocelot_tag_prefix {
OCELOT_TAG_PREFIX_DISABLED = 0,
OCELOT_TAG_PREFIX_NONE,
@@ -673,6 +680,7 @@ struct ocelot {
struct regmap_field *regfields[REGFIELD_MAX];
const u32 *const *map;
const struct ocelot_stat_layout *stats_layout;
+ struct list_head stats_regions;
unsigned int num_stats;
u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
--
2.25.1
On Sun, 9 Jan 2022 17:06:16 -0800 Colin Foster wrote:
> Ocelot loops over memory regions to gather stats on different ports.
> These regions are mostly continuous, and are ordered. This patch set
> uses that information to break the stats reads into regions that can get
> read in bulk.
>
> The motiviation is for general cleanup, but also for SPI. Performing two
> back-to-back reads on a SPI bus require toggling the CS line, holding,
> re-toggling the CS line, sending 3 address bytes, sending N padding
> bytes, then actually performing the read. Bulk reads could reduce almost
> all of that overhead, but require that the reads are performed via
> regmap_bulk_read.
Looks like this missed our 5.17 PR, please repost in two weeks once the
merge window is open. You can check if net-next is open here:
http://vger.kernel.org/~davem/net-next.html