Hello everyone,
Here is a set of updates for the PPv2 classifier, the main feature being
the support for steering to RSS contexts, to leverage all the available
RSS tables in the controller.
The first two patches are non-critical fixes for the classifier, the
first one prevents us from allocating too much room to store the
classification rules, the second one configuring the C2 engine as
suggested by the PPv2 functionnal specs.
Patches 3 to 5 introduce support for RSS contexts in mvpp2, allowing us
to steer traffic to dedicated RSS tables.
Thanks,
Maxime
Maxime Chevallier (5):
net: mvpp2: cls: Use the correct number of rules in various places
net: mvpp2: cls: Bypass C2 internals FIFOs at init
net: mvpp2: cls: Use RSS contexts to handle RSS tables
net: mvpp2: cls: Extract the RSS context when parsing the ethtool rule
net: mvpp2: cls: Support steering to RSS contexts
drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 20 +-
.../net/ethernet/marvell/mvpp2/mvpp2_cls.c | 272 ++++++++++++++++--
.../net/ethernet/marvell/mvpp2/mvpp2_cls.h | 15 +-
.../net/ethernet/marvell/mvpp2/mvpp2_main.c | 67 ++++-
4 files changed, 326 insertions(+), 48 deletions(-)
--
2.20.1
The PPv2 controller has 8 RSS tables that are shared across all ports on
a given PPv2 instance. The previous implementation allocated one table
per port, leaving others unused.
By using RSS contexts, we can make use of multiple RSS tables per
port, one being the default table (always id 0), the other ones being
used as destinations for flow steering, in the same way as rx rings.
This commit introduces RSS contexts management in the PPv2 driver. We
always reserve one table per port, allocated when the port is probed.
The global table list is stored in the struct mvpp2, as it's a global
resource. Each port then maintains a list of indices in that global
table, that way each port can have it's own numbering scheme starting
from 0.
One limitation that seems unavoidable is that the hashing parameters are
shared across all RSS contexts for a given port. Hashing parameters for
ctx 0 will be applied to all contexts.
Signed-off-by: Maxime Chevallier <[email protected]>
---
drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 16 +-
.../net/ethernet/marvell/mvpp2/mvpp2_cls.c | 200 +++++++++++++++---
.../net/ethernet/marvell/mvpp2/mvpp2_cls.h | 15 +-
.../net/ethernet/marvell/mvpp2/mvpp2_main.c | 65 +++++-
4 files changed, 255 insertions(+), 41 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index bb466af9434b..18ae8d06b692 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -626,6 +626,7 @@
#define MVPP2_N_RFS_RULES (MVPP2_N_RFS_ENTRIES_PER_FLOW * 7)
/* RSS constants */
+#define MVPP22_N_RSS_TABLES 8
#define MVPP22_RSS_TABLE_ENTRIES 32
/* IPv6 max L3 address size */
@@ -727,6 +728,10 @@ enum mvpp2_prs_l3_cast {
/* Definitions */
struct mvpp2_dbgfs_entries;
+struct mvpp2_rss_table {
+ u32 indir[MVPP22_RSS_TABLE_ENTRIES];
+};
+
/* Shared Packet Processor resources */
struct mvpp2 {
/* Shared registers' base addresses */
@@ -790,6 +795,9 @@ struct mvpp2 {
/* Debugfs entries private data */
struct mvpp2_dbgfs_entries *dbgfs_entries;
+
+ /* RSS Indirection tables */
+ struct mvpp2_rss_table *rss_tables[MVPP22_N_RSS_TABLES];
};
struct mvpp2_pcpu_stats {
@@ -921,12 +929,14 @@ struct mvpp2_port {
u32 tx_time_coal;
- /* RSS indirection table */
- u32 indir[MVPP22_RSS_TABLE_ENTRIES];
-
/* List of steering rules active on that port */
struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_ENTRIES_PER_FLOW];
int n_rfs_rules;
+
+ /* Each port has its own view of the rss contexts, so that it can number
+ * them from 0
+ */
+ int rss_ctx[MVPP22_N_RSS_TABLES];
};
/* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index d549e9a29d9a..c16e343ccbbf 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -969,12 +969,22 @@ u32 mvpp2_cls_c2_hit_count(struct mvpp2 *priv, int c2_index)
return mvpp2_read(priv, MVPP22_CLS_C2_HIT_CTR);
}
-static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
+static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port, u32 ctx)
{
struct mvpp2_cls_c2_entry c2;
+ u8 qh, ql;
mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+ /* The RxQ number is used to select the RSS table. It that case, we set
+ * it to be the ctx number.
+ */
+ qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+ ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+ c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+ MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
mvpp2_cls_c2_write(port->priv, &c2);
@@ -983,22 +993,45 @@ static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
{
struct mvpp2_cls_c2_entry c2;
+ u8 qh, ql;
mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+ /* Reset the default destination RxQ to the port's first rx queue. */
+ qh = (port->first_rxq >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+ ql = port->first_rxq & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+ c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+ MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
c2.attr[2] &= ~MVPP22_CLS_C2_ATTR2_RSS_EN;
mvpp2_cls_c2_write(port->priv, &c2);
}
-void mvpp22_port_rss_enable(struct mvpp2_port *port)
+static inline int mvpp22_rss_ctx(struct mvpp2_port *port, int port_rss_ctx)
{
- mvpp2_rss_port_c2_enable(port);
+ return port->rss_ctx[port_rss_ctx];
}
-void mvpp22_port_rss_disable(struct mvpp2_port *port)
+int mvpp22_port_rss_enable(struct mvpp2_port *port)
{
+ if (mvpp22_rss_ctx(port, 0) < 0)
+ return -EINVAL;
+
+ mvpp2_rss_port_c2_enable(port, mvpp22_rss_ctx(port, 0));
+
+ return 0;
+}
+
+int mvpp22_port_rss_disable(struct mvpp2_port *port)
+{
+ if (mvpp22_rss_ctx(port, 0) < 0)
+ return -EINVAL;
+
mvpp2_rss_port_c2_disable(port);
+
+ return 0;
}
static void mvpp22_port_c2_lookup_disable(struct mvpp2_port *port, int entry)
@@ -1331,19 +1364,136 @@ static inline u32 mvpp22_rxfh_indir(struct mvpp2_port *port, u32 rxq)
return port->first_rxq + ((rxq * nrxqs + rxq / cpus) % port->nrxqs);
}
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table)
+static void mvpp22_rss_fill_table(struct mvpp2_port *port,
+ struct mvpp2_rss_table *table,
+ u32 rss_ctx)
{
struct mvpp2 *priv = port->priv;
int i;
for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
- u32 sel = MVPP22_RSS_INDEX_TABLE(table) |
+ u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
MVPP22_RSS_INDEX_TABLE_ENTRY(i);
mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY,
- mvpp22_rxfh_indir(port, port->indir[i]));
+ mvpp22_rxfh_indir(port, table->indir[i]));
+ }
+}
+
+static int mvpp22_rss_context_create(struct mvpp2_port *port, u32 *rss_ctx)
+{
+ struct mvpp2 *priv = port->priv;
+ u32 ctx;
+
+ /* Find the first free RSS table */
+ for (ctx = 0; ctx < MVPP22_N_RSS_TABLES; ctx++) {
+ if (!priv->rss_tables[ctx])
+ break;
+ }
+
+ if (ctx == MVPP22_N_RSS_TABLES)
+ return -EINVAL;
+
+ priv->rss_tables[ctx] = kzalloc(sizeof(*priv->rss_tables[ctx]),
+ GFP_KERNEL);
+ if (!priv->rss_tables[ctx])
+ return -ENOMEM;
+
+ *rss_ctx = ctx;
+
+ /* Set the table width: replace the whole classifier Rx queue number
+ * with the ones configured in RSS table entries.
+ */
+ mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(ctx));
+ mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+
+ mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
+ mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
+
+ return 0;
+}
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *port_ctx)
+{
+ u32 rss_ctx;
+ int ret, i;
+
+ ret = mvpp22_rss_context_create(port, &rss_ctx);
+ if (ret)
+ return ret;
+
+ /* Find the first available context number in the port, starting from 1.
+ * Context 0 on each port is reserved for the default context.
+ */
+ for (i = 1; i < MVPP22_N_RSS_TABLES; i++) {
+ if (port->rss_ctx[i] < 0)
+ break;
}
+
+ port->rss_ctx[i] = rss_ctx;
+ *port_ctx = i;
+
+ return 0;
+}
+
+static struct mvpp2_rss_table *mvpp22_rss_table_get(struct mvpp2 *priv,
+ int rss_ctx)
+{
+ if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+ return NULL;
+
+ return priv->rss_tables[rss_ctx];
+}
+
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 port_ctx)
+{
+ struct mvpp2 *priv = port->priv;
+ int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+
+ if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+ return -EINVAL;
+
+ kfree(priv->rss_tables[rss_ctx]);
+
+ priv->rss_tables[rss_ctx] = NULL;
+ port->rss_ctx[port_ctx] = -1;
+
+ return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 port_ctx,
+ const u32 *indir)
+{
+ int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+ struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+ rss_ctx);
+
+ if (!rss_table)
+ return -EINVAL;
+
+ memcpy(rss_table->indir, indir,
+ MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+ mvpp22_rss_fill_table(port, rss_table, rss_ctx);
+
+ return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 port_ctx,
+ u32 *indir)
+{
+ int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+ struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+ rss_ctx);
+
+ if (!rss_table)
+ return -EINVAL;
+
+ memcpy(indir, rss_table->indir,
+ MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+ return 0;
}
int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info)
@@ -1427,32 +1577,32 @@ int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
return 0;
}
-void mvpp22_port_rss_init(struct mvpp2_port *port)
+int mvpp22_port_rss_init(struct mvpp2_port *port)
{
- struct mvpp2 *priv = port->priv;
- int i;
+ struct mvpp2_rss_table *table;
+ u32 context = 0;
+ int i, ret;
- /* Set the table width: replace the whole classifier Rx queue number
- * with the ones configured in RSS table entries.
- */
- mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(port->id));
- mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+ for (i = 0; i < MVPP22_N_RSS_TABLES; i++)
+ port->rss_ctx[i] = -1;
- /* The default RxQ is used as a key to select the RSS table to use.
- * We use one RSS table per port.
- */
- mvpp2_write(priv, MVPP22_RSS_INDEX,
- MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
- mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
- MVPP22_RSS_TABLE_POINTER(port->id));
+ ret = mvpp22_rss_context_create(port, &context);
+ if (ret)
+ return ret;
+
+ table = mvpp22_rss_table_get(port->priv, context);
+ if (!table)
+ return -EINVAL;
+
+ port->rss_ctx[0] = context;
/* Configure the first table to evenly distribute the packets across
* real Rx Queues. The table entries map a hash to a port Rx Queue.
*/
for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++)
- port->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
+ table->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
- mvpp22_rss_fill_table(port, port->id);
+ mvpp22_rss_fill_table(port, table, mvpp22_rss_ctx(port, 0));
/* Configure default flows */
mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_IP4, MVPP22_CLS_HEK_IP4_2T);
@@ -1461,4 +1611,6 @@ void mvpp22_port_rss_init(struct mvpp2_port *port)
mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_TCP6, MVPP22_CLS_HEK_IP6_5T);
mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP4, MVPP22_CLS_HEK_IP4_5T);
mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP6, MVPP22_CLS_HEK_IP6_5T);
+
+ return 0;
}
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
index 56b617375a65..26cc1176e758 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
@@ -249,11 +249,18 @@ struct mvpp2_cls_lookup_entry {
u32 data;
};
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
-void mvpp22_port_rss_init(struct mvpp2_port *port);
+int mvpp22_port_rss_init(struct mvpp2_port *port);
-void mvpp22_port_rss_enable(struct mvpp2_port *port);
-void mvpp22_port_rss_disable(struct mvpp2_port *port);
+int mvpp22_port_rss_enable(struct mvpp2_port *port);
+int mvpp22_port_rss_disable(struct mvpp2_port *port);
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *rss_ctx);
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 rss_ctx);
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 rss_ctx,
+ const u32 *indir);
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 rss_ctx,
+ u32 *indir);
int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 8432315447dd..3ed713b8dea5 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4002,24 +4002,25 @@ static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct mvpp2_port *port = netdev_priv(dev);
+ int ret = 0;
if (!mvpp22_rss_is_supported())
return -EOPNOTSUPP;
if (indir)
- memcpy(indir, port->indir,
- ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+ ret = mvpp22_port_rss_ctx_indir_get(port, 0, indir);
if (hfunc)
*hfunc = ETH_RSS_HASH_CRC32;
- return 0;
+ return ret;
}
static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct mvpp2_port *port = netdev_priv(dev);
+ int ret = 0;
if (!mvpp22_rss_is_supported())
return -EOPNOTSUPP;
@@ -4030,15 +4031,58 @@ static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
if (key)
return -EOPNOTSUPP;
- if (indir) {
- memcpy(port->indir, indir,
- ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
- mvpp22_rss_fill_table(port, port->id);
- }
+ if (indir)
+ ret = mvpp22_port_rss_ctx_indir_set(port, 0, indir);
- return 0;
+ return ret;
}
+static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir,
+ u8 *key, u8 *hfunc, u32 rss_context)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ int ret = 0;
+
+ if (!mvpp22_rss_is_supported())
+ return -EOPNOTSUPP;
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_CRC32;
+
+ if (indir)
+ ret = mvpp22_port_rss_ctx_indir_get(port, rss_context, indir);
+
+ return ret;
+}
+
+static int mvpp2_ethtool_set_rxfh_context(struct net_device *dev,
+ const u32 *indir, const u8 *key,
+ const u8 hfunc, u32 *rss_context,
+ bool delete)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ int ret;
+
+ if (!mvpp22_rss_is_supported())
+ return -EOPNOTSUPP;
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
+ return -EOPNOTSUPP;
+
+ if (key)
+ return -EOPNOTSUPP;
+
+ if (delete)
+ return mvpp22_port_rss_ctx_delete(port, *rss_context);
+
+ if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+ ret = mvpp22_port_rss_ctx_create(port, rss_context);
+ if (ret)
+ return ret;
+ }
+
+ return mvpp22_port_rss_ctx_indir_set(port, *rss_context, indir);
+}
/* Device ops */
static const struct net_device_ops mvpp2_netdev_ops = {
@@ -4075,7 +4119,8 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
.get_rxfh_indir_size = mvpp2_ethtool_get_rxfh_indir_size,
.get_rxfh = mvpp2_ethtool_get_rxfh,
.set_rxfh = mvpp2_ethtool_set_rxfh,
-
+ .get_rxfh_context = mvpp2_ethtool_get_rxfh_context,
+ .set_rxfh_context = mvpp2_ethtool_set_rxfh_context,
};
/* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
--
2.20.1
ethtool_rx_flow_rule_create takes into parameter the ethtool flow spec,
which doesn't contain the rss context id. We therefore need to extract
it ourself before parsing the ethtool rule.
The FLOW_RSS flag is only set in info->fs.flow_type, and not
info->flow_type.
Signed-off-by: Maxime Chevallier <[email protected]>
---
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index c16e343ccbbf..c1a83e9cb80a 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -1281,6 +1281,12 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
input.fs = &info->fs;
+ /* We need to manually set the rss_ctx, since this info isn't present
+ * in info->fs
+ */
+ if (info->fs.flow_type & FLOW_RSS)
+ input.rss_ctx = info->rss_context;
+
ethtool_rule = ethtool_rx_flow_rule_create(&input);
if (IS_ERR(ethtool_rule)) {
ret = PTR_ERR(ethtool_rule);
--
2.20.1
When steering to an RXQ, we can perform an extra RSS step to assign a
queue from an RSS table.
This is done by setting the RSS_EN attribute in the C2 engine. In that
case, the RXQ that is assigned is the global RSS context id, that is
then translated to an RSS table using the RXQ2RSS table.
An example using ethtool to steer to RXQ 2 and 3 would be :
ethtool -X eth0 weight 0 0 1 1 context new
(This would print the allocated context id, let's say it's 1)
ethtool -N eth0 flow-type udp4 dst-port 1234 context 1 loc 0
The hash parameters are the ones that are globally configured for RSS :
ethtool -N eth0 rx-flow-hash udp4 sdfn
When an RSS context is removed while there are active classification
rules using this context, these rules are removed.
Signed-off-by: Maxime Chevallier <[email protected]>
---
.../net/ethernet/marvell/mvpp2/mvpp2_cls.c | 58 +++++++++++++++++--
1 file changed, 54 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index c1a83e9cb80a..cd0daad011ce 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -1068,7 +1068,7 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
struct flow_action_entry *act;
struct mvpp2_cls_c2_entry c2;
u8 qh, ql, pmap;
- int index;
+ int index, ctx;
memset(&c2, 0, sizeof(c2));
@@ -1108,14 +1108,36 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
*/
c2.act = MVPP22_CLS_C2_ACT_COLOR(MVPP22_C2_COL_NO_UPD_LOCK);
+ /* Update RSS status after matching this entry */
+ if (act->queue.ctx)
+ c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+ /* Always lock the RSS_EN decision. We might have high prio
+ * rules steering to an RXQ, and a lower one steering to RSS,
+ * we don't want the low prio RSS rule overwriting this flag.
+ */
+ c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
+
/* Mark packet as "forwarded to software", needed for RSS */
c2.act |= MVPP22_CLS_C2_ACT_FWD(MVPP22_C2_FWD_SW_LOCK);
c2.act |= MVPP22_CLS_C2_ACT_QHIGH(MVPP22_C2_UPD_LOCK) |
MVPP22_CLS_C2_ACT_QLOW(MVPP22_C2_UPD_LOCK);
- qh = ((act->queue.index + port->first_rxq) >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
- ql = (act->queue.index + port->first_rxq) & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+ if (act->queue.ctx) {
+ /* Get the global ctx number */
+ ctx = mvpp22_rss_ctx(port, act->queue.ctx);
+ if (ctx < 0)
+ return -EINVAL;
+
+ qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+ ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+ } else {
+ qh = ((act->queue.index + port->first_rxq) >> 3) &
+ MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+ ql = (act->queue.index + port->first_rxq) &
+ MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+ }
c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
MVPP22_CLS_C2_ATTR0_QLOW(ql);
@@ -1235,6 +1257,13 @@ static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule)
if (act->id != FLOW_ACTION_QUEUE && act->id != FLOW_ACTION_DROP)
return -EOPNOTSUPP;
+ /* When both an RSS context and an queue index are set, the index
+ * is considered as an offset to be added to the indirection table
+ * entries. We don't support this, so reject this rule.
+ */
+ if (act->queue.ctx && act->queue.index)
+ return -EOPNOTSUPP;
+
/* For now, only use the C2 engine which has a HEK size limited to 64
* bits for TCAM matching.
*/
@@ -1455,11 +1484,32 @@ static struct mvpp2_rss_table *mvpp22_rss_table_get(struct mvpp2 *priv,
int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 port_ctx)
{
struct mvpp2 *priv = port->priv;
- int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+ struct ethtool_rxnfc *rxnfc;
+ int i, rss_ctx, ret;
+
+ rss_ctx = mvpp22_rss_ctx(port, port_ctx);
if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
return -EINVAL;
+ /* Invalidate any active classification rule that use this context */
+ for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
+ if (!port->rfs_rules[i])
+ continue;
+
+ rxnfc = &port->rfs_rules[i]->rxnfc;
+ if (!(rxnfc->fs.flow_type & FLOW_RSS) ||
+ rxnfc->rss_context != port_ctx)
+ continue;
+
+ ret = mvpp2_ethtool_cls_rule_del(port, rxnfc);
+ if (ret) {
+ netdev_warn(port->dev,
+ "couldn't remove classification rule %d associated to this context",
+ rxnfc->fs.location);
+ }
+ }
+
kfree(priv->rss_tables[rss_ctx]);
priv->rss_tables[rss_ctx] = NULL;
--
2.20.1
As of today, the classification offload implementation only supports 4
different rules to be offloaded. This number has been hardcoded in the
rule insertion function, and the wrong define is being used elsewhere.
Use the correct #define everywhere to make sure we always check for the
correct number of rules.
Signed-off-by: Maxime Chevallier <[email protected]>
---
drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 2 +-
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c | 4 ++--
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 6171270a016c..d5df813e08c4 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -923,7 +923,7 @@ struct mvpp2_port {
u32 indir[MVPP22_RSS_TABLE_ENTRIES];
/* List of steering rules active on that port */
- struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_RULES];
+ struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_ENTRIES_PER_FLOW];
int n_rfs_rules;
};
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index d046f7a1dcf5..9ce73297276e 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -1212,7 +1212,7 @@ int mvpp2_ethtool_cls_rule_get(struct mvpp2_port *port,
{
struct mvpp2_ethtool_fs *efs;
- if (rxnfc->fs.location >= MVPP2_N_RFS_RULES)
+ if (rxnfc->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
return -EINVAL;
efs = port->rfs_rules[rxnfc->fs.location];
@@ -1232,7 +1232,7 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
struct mvpp2_ethtool_fs *efs, *old_efs;
int ret = 0;
- if (info->fs.location >= 4 ||
+ if (info->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW ||
info->fs.location < 0)
return -EINVAL;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index d38952eb7aa9..8432315447dd 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3956,7 +3956,7 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
ret = mvpp2_ethtool_cls_rule_get(port, info);
break;
case ETHTOOL_GRXCLSRLALL:
- for (i = 0; i < MVPP2_N_RFS_RULES; i++) {
+ for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
if (port->rfs_rules[i])
rules[loc++] = i;
}
--
2.20.1
From: Maxime Chevallier <[email protected]>
Date: Fri, 24 May 2019 12:05:49 +0200
> Here is a set of updates for the PPv2 classifier, the main feature being
> the support for steering to RSS contexts, to leverage all the available
> RSS tables in the controller.
>
> The first two patches are non-critical fixes for the classifier, the
> first one prevents us from allocating too much room to store the
> classification rules, the second one configuring the C2 engine as
> suggested by the PPv2 functionnal specs.
>
> Patches 3 to 5 introduce support for RSS contexts in mvpp2, allowing us
> to steer traffic to dedicated RSS tables.
Series applied, thanks.
On Tue, Apr 14, 2020 at 1:21 AM Maxime Chevallier
<[email protected]> wrote:
>
> The PPv2 controller has 8 RSS tables that are shared across all ports on
> a given PPv2 instance. The previous implementation allocated one table
> per port, leaving others unused.
>
> By using RSS contexts, we can make use of multiple RSS tables per
> port, one being the default table (always id 0), the other ones being
> used as destinations for flow steering, in the same way as rx rings.
>
> This commit introduces RSS contexts management in the PPv2 driver. We
> always reserve one table per port, allocated when the port is probed.
>
> The global table list is stored in the struct mvpp2, as it's a global
> resource. Each port then maintains a list of indices in that global
> table, that way each port can have it's own numbering scheme starting
> from 0.
>
> One limitation that seems unavoidable is that the hashing parameters are
> shared across all RSS contexts for a given port. Hashing parameters for
> ctx 0 will be applied to all contexts.
>
> Signed-off-by: Maxime Chevallier <[email protected]>
Hi all,
I noticed that enabling rxhash blocks the RX on my Macchiatobin. It
works fine with the 10G ports (the RX rate goes 4x up) but it
completely kills the gigabit interface.
# 10G port
root@macchiatobin:~# iperf3 -c 192.168.0.2
Connecting to host 192.168.0.2, port 5201
[ 5] local 192.168.0.1 port 42394 connected to 192.168.0.2 port 5201
[ ID] Interval Transfer Bitrate Retr Cwnd
[ 5] 0.00-1.00 sec 941 MBytes 7.89 Gbits/sec 4030 250 KBytes
[ 5] 1.00-2.00 sec 933 MBytes 7.82 Gbits/sec 4393 240 KBytes
root@macchiatobin:~# ethtool -K eth0 rxhash on
root@macchiatobin:~# iperf3 -c 192.168.0.2
Connecting to host 192.168.0.2, port 5201
[ 5] local 192.168.0.1 port 42398 connected to 192.168.0.2 port 5201
[ ID] Interval Transfer Bitrate Retr Cwnd
[ 5] 0.00-1.00 sec 860 MBytes 7.21 Gbits/sec 428 410 KBytes
[ 5] 1.00-2.00 sec 859 MBytes 7.20 Gbits/sec 185 563 KBytes
# gigabit port
root@macchiatobin:~# iperf3 -c turbo
Connecting to host turbo, port 5201
[ 5] local 192.168.85.42 port 45144 connected to 192.168.85.6 port 5201
[ ID] Interval Transfer Bitrate Retr Cwnd
[ 5] 0.00-1.00 sec 113 MBytes 948 Mbits/sec 0 407 KBytes
[ 5] 1.00-2.00 sec 112 MBytes 942 Mbits/sec 0 428 KBytes
root@macchiatobin:~# ethtool -K eth2 rxhash on
root@macchiatobin:~# iperf3 -c turbo
iperf3: error - unable to connect to server: Resource temporarily unavailable
I've bisected and it seems that this commit causes the issue. I tried
to revert it on nex-next as a second test, but the code has changed a
lot much since, generating too much conflicts.
Can you have a look into this?
Thanks,
--
Matteo Croce
per aspera ad upstream
On Tue, Apr 14, 2020 at 01:43:02AM +0200, Matteo Croce wrote:
> On Tue, Apr 14, 2020 at 1:21 AM Maxime Chevallier
> <[email protected]> wrote:
> >
> > The PPv2 controller has 8 RSS tables that are shared across all ports on
> > a given PPv2 instance. The previous implementation allocated one table
> > per port, leaving others unused.
> >
> > By using RSS contexts, we can make use of multiple RSS tables per
> > port, one being the default table (always id 0), the other ones being
> > used as destinations for flow steering, in the same way as rx rings.
> >
> > This commit introduces RSS contexts management in the PPv2 driver. We
> > always reserve one table per port, allocated when the port is probed.
> >
> > The global table list is stored in the struct mvpp2, as it's a global
> > resource. Each port then maintains a list of indices in that global
> > table, that way each port can have it's own numbering scheme starting
> > from 0.
> >
> > One limitation that seems unavoidable is that the hashing parameters are
> > shared across all RSS contexts for a given port. Hashing parameters for
> > ctx 0 will be applied to all contexts.
> >
> > Signed-off-by: Maxime Chevallier <[email protected]>
>
> Hi all,
>
> I noticed that enabling rxhash blocks the RX on my Macchiatobin. It
> works fine with the 10G ports (the RX rate goes 4x up) but it
> completely kills the gigabit interface.
>
> # 10G port
> root@macchiatobin:~# iperf3 -c 192.168.0.2
> Connecting to host 192.168.0.2, port 5201
> [ 5] local 192.168.0.1 port 42394 connected to 192.168.0.2 port 5201
> [ ID] Interval Transfer Bitrate Retr Cwnd
> [ 5] 0.00-1.00 sec 941 MBytes 7.89 Gbits/sec 4030 250 KBytes
> [ 5] 1.00-2.00 sec 933 MBytes 7.82 Gbits/sec 4393 240 KBytes
> root@macchiatobin:~# ethtool -K eth0 rxhash on
> root@macchiatobin:~# iperf3 -c 192.168.0.2
> Connecting to host 192.168.0.2, port 5201
> [ 5] local 192.168.0.1 port 42398 connected to 192.168.0.2 port 5201
> [ ID] Interval Transfer Bitrate Retr Cwnd
> [ 5] 0.00-1.00 sec 860 MBytes 7.21 Gbits/sec 428 410 KBytes
> [ 5] 1.00-2.00 sec 859 MBytes 7.20 Gbits/sec 185 563 KBytes
>
> # gigabit port
> root@macchiatobin:~# iperf3 -c turbo
> Connecting to host turbo, port 5201
> [ 5] local 192.168.85.42 port 45144 connected to 192.168.85.6 port 5201
> [ ID] Interval Transfer Bitrate Retr Cwnd
> [ 5] 0.00-1.00 sec 113 MBytes 948 Mbits/sec 0 407 KBytes
> [ 5] 1.00-2.00 sec 112 MBytes 942 Mbits/sec 0 428 KBytes
> root@macchiatobin:~# ethtool -K eth2 rxhash on
> root@macchiatobin:~# iperf3 -c turbo
> iperf3: error - unable to connect to server: Resource temporarily unavailable
>
> I've bisected and it seems that this commit causes the issue. I tried
> to revert it on nex-next as a second test, but the code has changed a
> lot much since, generating too much conflicts.
> Can you have a look into this?
This behaviour on eth2 is confirmed here on v5.6. Turning on rxhash
appears to prevent eth2 working.
Maxime, please look into this regression, thanks.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Thu, Apr 23, 2020 at 7:00 PM Russell King - ARM Linux admin
<[email protected]> wrote:
>
> On Tue, Apr 14, 2020 at 01:43:02AM +0200, Matteo Croce wrote:
> > On Tue, Apr 14, 2020 at 1:21 AM Maxime Chevallier
> > <[email protected]> wrote:
> > >
> > > The PPv2 controller has 8 RSS tables that are shared across all ports on
> > > a given PPv2 instance. The previous implementation allocated one table
> > > per port, leaving others unused.
> > >
> > > By using RSS contexts, we can make use of multiple RSS tables per
> > > port, one being the default table (always id 0), the other ones being
> > > used as destinations for flow steering, in the same way as rx rings.
> > >
> > > This commit introduces RSS contexts management in the PPv2 driver. We
> > > always reserve one table per port, allocated when the port is probed.
> > >
> > > The global table list is stored in the struct mvpp2, as it's a global
> > > resource. Each port then maintains a list of indices in that global
> > > table, that way each port can have it's own numbering scheme starting
> > > from 0.
> > >
> > > One limitation that seems unavoidable is that the hashing parameters are
> > > shared across all RSS contexts for a given port. Hashing parameters for
> > > ctx 0 will be applied to all contexts.
> > >
> > > Signed-off-by: Maxime Chevallier <[email protected]>
> >
> > Hi all,
> >
> > I noticed that enabling rxhash blocks the RX on my Macchiatobin. It
> > works fine with the 10G ports (the RX rate goes 4x up) but it
> > completely kills the gigabit interface.
> >
> > # 10G port
> > root@macchiatobin:~# iperf3 -c 192.168.0.2
> > Connecting to host 192.168.0.2, port 5201
> > [ 5] local 192.168.0.1 port 42394 connected to 192.168.0.2 port 5201
> > [ ID] Interval Transfer Bitrate Retr Cwnd
> > [ 5] 0.00-1.00 sec 941 MBytes 7.89 Gbits/sec 4030 250 KBytes
> > [ 5] 1.00-2.00 sec 933 MBytes 7.82 Gbits/sec 4393 240 KBytes
> > root@macchiatobin:~# ethtool -K eth0 rxhash on
> > root@macchiatobin:~# iperf3 -c 192.168.0.2
> > Connecting to host 192.168.0.2, port 5201
> > [ 5] local 192.168.0.1 port 42398 connected to 192.168.0.2 port 5201
> > [ ID] Interval Transfer Bitrate Retr Cwnd
> > [ 5] 0.00-1.00 sec 860 MBytes 7.21 Gbits/sec 428 410 KBytes
> > [ 5] 1.00-2.00 sec 859 MBytes 7.20 Gbits/sec 185 563 KBytes
> >
> > # gigabit port
> > root@macchiatobin:~# iperf3 -c turbo
> > Connecting to host turbo, port 5201
> > [ 5] local 192.168.85.42 port 45144 connected to 192.168.85.6 port 5201
> > [ ID] Interval Transfer Bitrate Retr Cwnd
> > [ 5] 0.00-1.00 sec 113 MBytes 948 Mbits/sec 0 407 KBytes
> > [ 5] 1.00-2.00 sec 112 MBytes 942 Mbits/sec 0 428 KBytes
> > root@macchiatobin:~# ethtool -K eth2 rxhash on
> > root@macchiatobin:~# iperf3 -c turbo
> > iperf3: error - unable to connect to server: Resource temporarily unavailable
> >
> > I've bisected and it seems that this commit causes the issue. I tried
> > to revert it on nex-next as a second test, but the code has changed a
> > lot much since, generating too much conflicts.
> > Can you have a look into this?
>
> This behaviour on eth2 is confirmed here on v5.6. Turning on rxhash
> appears to prevent eth2 working.
>
> Maxime, please look into this regression, thanks.
>
> --
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
>
Hi,
What do you think about temporarily disabling it like this?
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
NETIF_F_HW_VLAN_CTAG_FILTER;
if (mvpp22_rss_is_supported()) {
- dev->hw_features |= NETIF_F_RXHASH;
+ if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+ dev->hw_features |= NETIF_F_RXHASH;
dev->features |= NETIF_F_NTUPLE;
}
David, is this "workaround" too bad to get accepted?
Bye,
--
Matteo Croce
per aspera ad upstream
> -----Original Message-----
> From: Matteo Croce <[email protected]>
> Sent: Saturday, May 9, 2020 3:13 AM
> To: David S . Miller <[email protected]>
> Cc: Maxime Chevallier <[email protected]>; netdev
> <[email protected]>; LKML <[email protected]>; Antoine
> Tenart <[email protected]>; Thomas Petazzoni
> <[email protected]>; [email protected];
> [email protected]; Nadav Haklai <[email protected]>; Stefan
> Chulski <[email protected]>; Marcin Wojtas <[email protected]>; Linux
> ARM <[email protected]>; Russell King - ARM Linux admin
> <[email protected]>
> Subject: [EXT] Re: [PATCH net-next 3/5] net: mvpp2: cls: Use RSS contexts to
> handle RSS tables
>
> External Email
>
> ----------------------------------------------------------------------
> On Thu, Apr 23, 2020 at 7:00 PM Russell King - ARM Linux admin
> <[email protected]> wrote:
> >
> > On Tue, Apr 14, 2020 at 01:43:02AM +0200, Matteo Croce wrote:
> > > On Tue, Apr 14, 2020 at 1:21 AM Maxime Chevallier
> > > <[email protected]> wrote:
> > > >
> > > > The PPv2 controller has 8 RSS tables that are shared across all
> > > > ports on a given PPv2 instance. The previous implementation
> > > > allocated one table per port, leaving others unused.
> > > >
> > > > By using RSS contexts, we can make use of multiple RSS tables per
> > > > port, one being the default table (always id 0), the other ones
> > > > being used as destinations for flow steering, in the same way as rx rings.
> > > >
> > > > This commit introduces RSS contexts management in the PPv2 driver.
> > > > We always reserve one table per port, allocated when the port is probed.
> > > >
> > > > The global table list is stored in the struct mvpp2, as it's a
> > > > global resource. Each port then maintains a list of indices in
> > > > that global table, that way each port can have it's own numbering
> > > > scheme starting from 0.
> > > >
> > > > One limitation that seems unavoidable is that the hashing
> > > > parameters are shared across all RSS contexts for a given port.
> > > > Hashing parameters for ctx 0 will be applied to all contexts.
> > > >
> > > > Signed-off-by: Maxime Chevallier <[email protected]>
> > >
> > > Hi all,
> > >
> > > I noticed that enabling rxhash blocks the RX on my Macchiatobin. It
> > > works fine with the 10G ports (the RX rate goes 4x up) but it
> > > completely kills the gigabit interface.
> > >
> > > # 10G port
> > > root@macchiatobin:~# iperf3 -c 192.168.0.2 Connecting to host
> > > 192.168.0.2, port 5201 [ 5] local 192.168.0.1 port 42394 connected
> > > to 192.168.0.2 port 5201
> > > [ ID] Interval Transfer Bitrate Retr Cwnd
> > > [ 5] 0.00-1.00 sec 941 MBytes 7.89 Gbits/sec 4030 250 KBytes
> > > [ 5] 1.00-2.00 sec 933 MBytes 7.82 Gbits/sec 4393 240 KBytes
> > > root@macchiatobin:~# ethtool -K eth0 rxhash on root@macchiatobin:~#
> > > iperf3 -c 192.168.0.2 Connecting to host 192.168.0.2, port 5201 [
> > > 5] local 192.168.0.1 port 42398 connected to 192.168.0.2 port 5201
> > > [ ID] Interval Transfer Bitrate Retr Cwnd
> > > [ 5] 0.00-1.00 sec 860 MBytes 7.21 Gbits/sec 428 410 KBytes
> > > [ 5] 1.00-2.00 sec 859 MBytes 7.20 Gbits/sec 185 563 KBytes
> > >
> > > # gigabit port
> > > root@macchiatobin:~# iperf3 -c turbo Connecting to host turbo, port
> > > 5201 [ 5] local 192.168.85.42 port 45144 connected to 192.168.85.6
> > > port 5201
> > > [ ID] Interval Transfer Bitrate Retr Cwnd
> > > [ 5] 0.00-1.00 sec 113 MBytes 948 Mbits/sec 0 407 KBytes
> > > [ 5] 1.00-2.00 sec 112 MBytes 942 Mbits/sec 0 428 KBytes
> > > root@macchiatobin:~# ethtool -K eth2 rxhash on root@macchiatobin:~#
> > > iperf3 -c turbo
> > > iperf3: error - unable to connect to server: Resource temporarily
> > > unavailable
> > >
> > > I've bisected and it seems that this commit causes the issue. I
> > > tried to revert it on nex-next as a second test, but the code has
> > > changed a lot much since, generating too much conflicts.
> > > Can you have a look into this?
> >
> > This behaviour on eth2 is confirmed here on v5.6. Turning on rxhash
> > appears to prevent eth2 working.
> >
> > Maxime, please look into this regression, thanks.
> >
> > --
> > RMK's Patch system:
> > https://urldefense.proofpoint.com/v2/url?u=https-3A__www.armlinux.org.
> >
> uk_developer_patches_&d=DwIBaQ&c=nKjWec2b6R0mOyPaz7xtfQ&r=DDQ3dK
> wkTIxK
> > Al6_Bs7GMx4zhJArrXKN2mDMOXGh7lg&m=ntT7WKmzla65VWVPZMCr2-
> 8bTGq4cXdJ1RRL
> > gqFkmUc&s=jhKRohlyU0XtX0U0Rjt6XvJgMKLy_HedaFVSJwGYuD8&e=
> > FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down
> > 587kbps up
> >
>
> Hi,
>
> What do you think about temporarily disabling it like this?
>
> --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> @@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct platform_device
> *pdev,
> NETIF_F_HW_VLAN_CTAG_FILTER;
>
> if (mvpp22_rss_is_supported()) {
> - dev->hw_features |= NETIF_F_RXHASH;
> + if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> + dev->hw_features |= NETIF_F_RXHASH;
> dev->features |= NETIF_F_NTUPLE;
> }
>
>
> David, is this "workaround" too bad to get accepted?
Not sure that RSS related to physical interface(SGMII), better just remove NETIF_F_RXHASH as "workaround".
Stefan.
On Sat, May 09, 2020 at 11:15:58AM +0000, Stefan Chulski wrote:
>
>
> > -----Original Message-----
> > From: Matteo Croce <[email protected]>
> > Sent: Saturday, May 9, 2020 3:13 AM
> > To: David S . Miller <[email protected]>
> > Cc: Maxime Chevallier <[email protected]>; netdev
> > <[email protected]>; LKML <[email protected]>; Antoine
> > Tenart <[email protected]>; Thomas Petazzoni
> > <[email protected]>; [email protected];
> > [email protected]; Nadav Haklai <[email protected]>; Stefan
> > Chulski <[email protected]>; Marcin Wojtas <[email protected]>; Linux
> > ARM <[email protected]>; Russell King - ARM Linux admin
> > <[email protected]>
> > Subject: [EXT] Re: [PATCH net-next 3/5] net: mvpp2: cls: Use RSS contexts to
> > handle RSS tables
> >
> > Hi,
> >
> > What do you think about temporarily disabling it like this?
> >
> > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > @@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct platform_device
> > *pdev,
> > NETIF_F_HW_VLAN_CTAG_FILTER;
> >
> > if (mvpp22_rss_is_supported()) {
> > - dev->hw_features |= NETIF_F_RXHASH;
> > + if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > + dev->hw_features |= NETIF_F_RXHASH;
> > dev->features |= NETIF_F_NTUPLE;
> > }
> >
> >
> > David, is this "workaround" too bad to get accepted?
>
> Not sure that RSS related to physical interface(SGMII), better just remove NETIF_F_RXHASH as "workaround".
Hmm, I'm not sure this is the right way forward. This patch has the
effect of disabling:
d33ec4525007 ("net: mvpp2: add an RSS classification step for each flow")
but the commit you're pointing at which caused the regression is:
895586d5dc32 ("net: mvpp2: cls: Use RSS contexts to handle RSS tables")
Looking at the timeline here, it looks like Matteo raised the issue
very quickly after the patch was sent on the 14th April, and despite
following up on it, despite me following up on it, bootlin have
remained quiet. For a regression, that's not particularly good, and
doesn't leave many options but to ask davem to revert a commit, or
if possible fix it (which there doesn't seem to be any willingness
for either - maybe it's a feature no one uses on this platform?)
Would reverting the commit you point to as the cause (895586d5dc32)
resolve the problem, and have any advantage over entirely disabling
RSS?
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
Hello,
On Sat, 9 May 2020 12:45:18 +0100
Russell King - ARM Linux admin <[email protected]> wrote:
> Looking at the timeline here, it looks like Matteo raised the issue
> very quickly after the patch was sent on the 14th April, and despite
> following up on it, despite me following up on it, bootlin have
> remained quiet.
Unfortunately, we are no longer actively working on Marvell platform
support at the moment. We might have a look on a best effort basis, but
this is potentially a non-trivial issue, so I'm not sure when we will
have the chance to investigate and fix this.
Best regards,
Thomas
--
Thomas Petazzoni, CTO, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
On Sat, May 9, 2020 at 1:16 PM Stefan Chulski <[email protected]> wrote:
> > Hi,
> >
> > What do you think about temporarily disabling it like this?
> >
> > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > @@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct platform_device
> > *pdev,
> > NETIF_F_HW_VLAN_CTAG_FILTER;
> >
> > if (mvpp22_rss_is_supported()) {
> > - dev->hw_features |= NETIF_F_RXHASH;
> > + if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > + dev->hw_features |= NETIF_F_RXHASH;
> > dev->features |= NETIF_F_NTUPLE;
> > }
> >
> >
> > David, is this "workaround" too bad to get accepted?
>
> Not sure that RSS related to physical interface(SGMII), better just remove NETIF_F_RXHASH as "workaround".
>
> Stefan.
Hi,
The point is that RXHASH works fine on all interfaces, but on the
gigabit one (eth2 usually).
And on the 10 gbit interface is very very effective, the throughput
goes 4x when enabled, so it would be a big drawback to disable it on
all interfaces.
Honestly I don't have any 2.5 gbit hardware to test it on eth3, so I
don't know if rxhash actually only works on the first interface of a
unit (so eth0 and eth1),
or if it just doesn't work on the gigabit one.
If someone could test it on the 2.5 gbit port, this will be helpful.
Regards,
--
Matteo Croce
per aspera ad upstream
> -----Original Message-----
> From: Matteo Croce <[email protected]>
> Sent: Saturday, May 9, 2020 3:16 PM
> To: Stefan Chulski <[email protected]>
> Cc: David S . Miller <[email protected]>; Maxime Chevallier
> <[email protected]>; netdev <[email protected]>; LKML
> <[email protected]>; Antoine Tenart
> <[email protected]>; Thomas Petazzoni
> <[email protected]>; [email protected];
> [email protected]; Nadav Haklai <[email protected]>; Marcin
> Wojtas <[email protected]>; Linux ARM <linux-arm-
> [email protected]>; Russell King - ARM Linux admin
> <[email protected]>
> Subject: Re: [EXT] Re: [PATCH net-next 3/5] net: mvpp2: cls: Use RSS contexts to
> handle RSS tables
>
> On Sat, May 9, 2020 at 1:16 PM Stefan Chulski <[email protected]> wrote:
> > > Hi,
> > >
> > > What do you think about temporarily disabling it like this?
> > >
> > > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > @@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct
> > > platform_device *pdev,
> > > NETIF_F_HW_VLAN_CTAG_FILTER;
> > >
> > > if (mvpp22_rss_is_supported()) {
> > > - dev->hw_features |= NETIF_F_RXHASH;
> > > + if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > > + dev->hw_features |= NETIF_F_RXHASH;
> > > dev->features |= NETIF_F_NTUPLE;
> > > }
> > >
> > >
> > > David, is this "workaround" too bad to get accepted?
> >
> > Not sure that RSS related to physical interface(SGMII), better just remove
> NETIF_F_RXHASH as "workaround".
> >
> > Stefan.
>
> Hi,
>
> The point is that RXHASH works fine on all interfaces, but on the gigabit one
> (eth2 usually).
> And on the 10 gbit interface is very very effective, the throughput goes 4x when
> enabled, so it would be a big drawback to disable it on all interfaces.
>
> Honestly I don't have any 2.5 gbit hardware to test it on eth3, so I don't know if
> rxhash actually only works on the first interface of a unit (so eth0 and eth1), or
> if it just doesn't work on the gigabit one.
>
> If someone could test it on the 2.5 gbit port, this will be helpful.
RSS tables is part of Packet Processor IP, not MAC(so it's not related to specific speed). Probably issue exist on specific packet processor ports.
Since RSS work fine on first port of the CP, we can do the following:
if (port-> id == 0)
dev->hw_features |= NETIF_F_RXHASH;
Regards.
On Sat, May 09, 2020 at 02:16:44PM +0200, Thomas Petazzoni wrote:
> Hello,
>
> On Sat, 9 May 2020 12:45:18 +0100
> Russell King - ARM Linux admin <[email protected]> wrote:
>
> > Looking at the timeline here, it looks like Matteo raised the issue
> > very quickly after the patch was sent on the 14th April, and despite
> > following up on it, despite me following up on it, bootlin have
> > remained quiet.
>
> Unfortunately, we are no longer actively working on Marvell platform
> support at the moment. We might have a look on a best effort basis, but
> this is potentially a non-trivial issue, so I'm not sure when we will
> have the chance to investigate and fix this.
That may be the case, but that doesn't excuse the fact that we have a
regression and we need to do something.
Please can you suggest how we resolve this regression prior to
5.7-final?
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Sat, 9 May 2020 13:48:43 +0100
Russell King - ARM Linux admin <[email protected]> wrote:
> > Unfortunately, we are no longer actively working on Marvell platform
> > support at the moment. We might have a look on a best effort basis, but
> > this is potentially a non-trivial issue, so I'm not sure when we will
> > have the chance to investigate and fix this.
>
> That may be the case, but that doesn't excuse the fact that we have a
> regression and we need to do something.
Absolutely.
> Please can you suggest how we resolve this regression prior to
> 5.7-final?
Since 5.7 is really close, I would suggest to disable the functionality.
Best regards,
Thomas
--
Thomas Petazzoni, CTO, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
On Sat, May 9, 2020 at 1:45 PM Russell King - ARM Linux admin
<[email protected]> wrote:
>
> On Sat, May 09, 2020 at 11:15:58AM +0000, Stefan Chulski wrote:
> >
> >
> > > -----Original Message-----
> > > From: Matteo Croce <[email protected]>
> > > Sent: Saturday, May 9, 2020 3:13 AM
> > > To: David S . Miller <[email protected]>
> > > Cc: Maxime Chevallier <[email protected]>; netdev
> > > <[email protected]>; LKML <[email protected]>; Antoine
> > > Tenart <[email protected]>; Thomas Petazzoni
> > > <[email protected]>; [email protected];
> > > [email protected]; Nadav Haklai <[email protected]>; Stefan
> > > Chulski <[email protected]>; Marcin Wojtas <[email protected]>; Linux
> > > ARM <[email protected]>; Russell King - ARM Linux admin
> > > <[email protected]>
> > > Subject: [EXT] Re: [PATCH net-next 3/5] net: mvpp2: cls: Use RSS contexts to
> > > handle RSS tables
> > >
> > > Hi,
> > >
> > > What do you think about temporarily disabling it like this?
> > >
> > > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > @@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct platform_device
> > > *pdev,
> > > NETIF_F_HW_VLAN_CTAG_FILTER;
> > >
> > > if (mvpp22_rss_is_supported()) {
> > > - dev->hw_features |= NETIF_F_RXHASH;
> > > + if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > > + dev->hw_features |= NETIF_F_RXHASH;
> > > dev->features |= NETIF_F_NTUPLE;
> > > }
> > >
> > >
> > > David, is this "workaround" too bad to get accepted?
> >
> > Not sure that RSS related to physical interface(SGMII), better just remove NETIF_F_RXHASH as "workaround".
>
> Hmm, I'm not sure this is the right way forward. This patch has the
> effect of disabling:
>
> d33ec4525007 ("net: mvpp2: add an RSS classification step for each flow")
>
> but the commit you're pointing at which caused the regression is:
>
> 895586d5dc32 ("net: mvpp2: cls: Use RSS contexts to handle RSS tables")
>
>
Hi,
When git bisect pointed to 895586d5dc32 ("net: mvpp2: cls: Use RSS
contexts to handle RSS tables"), which was merged
almost an year after d33ec4525007 ("net: mvpp2: add an RSS
classification step for each flow"), so I assume that between these
two commits either the feature was working or it was disable and we
didn't notice
Without knowing what was happening, which commit should my Fixes tag point to?
Regards,
--
Matteo Croce
per aspera ad upstream
On Sat, May 09, 2020 at 12:31:21PM +0000, Stefan Chulski wrote:
> > -----Original Message-----
> > From: Matteo Croce <[email protected]>
> > Sent: Saturday, May 9, 2020 3:16 PM
> > To: Stefan Chulski <[email protected]>
> > Cc: David S . Miller <[email protected]>; Maxime Chevallier
> > <[email protected]>; netdev <[email protected]>; LKML
> > <[email protected]>; Antoine Tenart
> > <[email protected]>; Thomas Petazzoni
> > <[email protected]>; [email protected];
> > [email protected]; Nadav Haklai <[email protected]>; Marcin
> > Wojtas <[email protected]>; Linux ARM <linux-arm-
> > [email protected]>; Russell King - ARM Linux admin
> > <[email protected]>
> > Subject: Re: [EXT] Re: [PATCH net-next 3/5] net: mvpp2: cls: Use RSS contexts to
> > handle RSS tables
> >
> > Hi,
> >
> > The point is that RXHASH works fine on all interfaces, but on the gigabit one
> > (eth2 usually).
> > And on the 10 gbit interface is very very effective, the throughput goes 4x when
> > enabled, so it would be a big drawback to disable it on all interfaces.
> >
> > Honestly I don't have any 2.5 gbit hardware to test it on eth3, so I don't know if
> > rxhash actually only works on the first interface of a unit (so eth0 and eth1), or
> > if it just doesn't work on the gigabit one.
> >
> > If someone could test it on the 2.5 gbit port, this will be helpful.
>
> RSS tables is part of Packet Processor IP, not MAC(so it's not related to specific speed). Probably issue exist on specific packet processor ports.
> Since RSS work fine on first port of the CP, we can do the following:
> if (port-> id == 0)
> dev->hw_features |= NETIF_F_RXHASH;
I can confirm that Macchiatobin Single Shot eth0 port works with a
1G Fibre SFP or 10G DA SFP with or without rxhash on.
So it seems Stefan's hunch that it is port related is correct.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Sat, May 09, 2020 at 03:14:05PM +0200, Matteo Croce wrote:
> On Sat, May 9, 2020 at 1:45 PM Russell King - ARM Linux admin
> <[email protected]> wrote:
> >
> > On Sat, May 09, 2020 at 11:15:58AM +0000, Stefan Chulski wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Matteo Croce <[email protected]>
> > > > Sent: Saturday, May 9, 2020 3:13 AM
> > > > To: David S . Miller <[email protected]>
> > > > Cc: Maxime Chevallier <[email protected]>; netdev
> > > > <[email protected]>; LKML <[email protected]>; Antoine
> > > > Tenart <[email protected]>; Thomas Petazzoni
> > > > <[email protected]>; [email protected];
> > > > [email protected]; Nadav Haklai <[email protected]>; Stefan
> > > > Chulski <[email protected]>; Marcin Wojtas <[email protected]>; Linux
> > > > ARM <[email protected]>; Russell King - ARM Linux admin
> > > > <[email protected]>
> > > > Subject: [EXT] Re: [PATCH net-next 3/5] net: mvpp2: cls: Use RSS contexts to
> > > > handle RSS tables
> > > >
> > > > Hi,
> > > >
> > > > What do you think about temporarily disabling it like this?
> > > >
> > > > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > > @@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct platform_device
> > > > *pdev,
> > > > NETIF_F_HW_VLAN_CTAG_FILTER;
> > > >
> > > > if (mvpp22_rss_is_supported()) {
> > > > - dev->hw_features |= NETIF_F_RXHASH;
> > > > + if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > > > + dev->hw_features |= NETIF_F_RXHASH;
> > > > dev->features |= NETIF_F_NTUPLE;
> > > > }
> > > >
> > > >
> > > > David, is this "workaround" too bad to get accepted?
> > >
> > > Not sure that RSS related to physical interface(SGMII), better just remove NETIF_F_RXHASH as "workaround".
> >
> > Hmm, I'm not sure this is the right way forward. This patch has the
> > effect of disabling:
> >
> > d33ec4525007 ("net: mvpp2: add an RSS classification step for each flow")
> >
> > but the commit you're pointing at which caused the regression is:
> >
> > 895586d5dc32 ("net: mvpp2: cls: Use RSS contexts to handle RSS tables")
> >
> >
>
> Hi,
>
> When git bisect pointed to 895586d5dc32 ("net: mvpp2: cls: Use RSS
> contexts to handle RSS tables"), which was merged
> almost an year after d33ec4525007 ("net: mvpp2: add an RSS
> classification step for each flow"), so I assume that between these
> two commits either the feature was working or it was disable and we
> didn't notice
>
> Without knowing what was happening, which commit should my Fixes tag point to?
Let me make sure that I get this clear:
- Prior to 895586d5dc32, you can turn on and off rxhash without issue
on any port.
- After 895586d5dc32, turning rxhash on eth2 prevents reception.
Prior to 895586d5dc32, with rxhash on, it looks like hashing using
CRC32 is supported but only one context. So, if it's possible to
enable rxhash on any port on the mcbin without 895586d5dc32, and the
port continues to work, I'd say the bug was introduced by
895586d5dc32.
Of course, that would be reinforced if there was a measurable
difference in performance due to rxhash on each port.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Sat, May 09, 2020 at 02:51:05PM +0100, Russell King - ARM Linux admin wrote:
> On Sat, May 09, 2020 at 03:14:05PM +0200, Matteo Croce wrote:
> > On Sat, May 9, 2020 at 1:45 PM Russell King - ARM Linux admin
> > <[email protected]> wrote:
> > >
> > > On Sat, May 09, 2020 at 11:15:58AM +0000, Stefan Chulski wrote:
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Matteo Croce <[email protected]>
> > > > > Sent: Saturday, May 9, 2020 3:13 AM
> > > > > To: David S . Miller <[email protected]>
> > > > > Cc: Maxime Chevallier <[email protected]>; netdev
> > > > > <[email protected]>; LKML <[email protected]>; Antoine
> > > > > Tenart <[email protected]>; Thomas Petazzoni
> > > > > <[email protected]>; [email protected];
> > > > > [email protected]; Nadav Haklai <[email protected]>; Stefan
> > > > > Chulski <[email protected]>; Marcin Wojtas <[email protected]>; Linux
> > > > > ARM <[email protected]>; Russell King - ARM Linux admin
> > > > > <[email protected]>
> > > > > Subject: [EXT] Re: [PATCH net-next 3/5] net: mvpp2: cls: Use RSS contexts to
> > > > > handle RSS tables
> > > > >
> > > > > Hi,
> > > > >
> > > > > What do you think about temporarily disabling it like this?
> > > > >
> > > > > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > > > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > > > @@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct platform_device
> > > > > *pdev,
> > > > > NETIF_F_HW_VLAN_CTAG_FILTER;
> > > > >
> > > > > if (mvpp22_rss_is_supported()) {
> > > > > - dev->hw_features |= NETIF_F_RXHASH;
> > > > > + if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > > > > + dev->hw_features |= NETIF_F_RXHASH;
> > > > > dev->features |= NETIF_F_NTUPLE;
> > > > > }
> > > > >
> > > > >
> > > > > David, is this "workaround" too bad to get accepted?
> > > >
> > > > Not sure that RSS related to physical interface(SGMII), better just remove NETIF_F_RXHASH as "workaround".
> > >
> > > Hmm, I'm not sure this is the right way forward. This patch has the
> > > effect of disabling:
> > >
> > > d33ec4525007 ("net: mvpp2: add an RSS classification step for each flow")
> > >
> > > but the commit you're pointing at which caused the regression is:
> > >
> > > 895586d5dc32 ("net: mvpp2: cls: Use RSS contexts to handle RSS tables")
> > >
> > >
> >
> > Hi,
> >
> > When git bisect pointed to 895586d5dc32 ("net: mvpp2: cls: Use RSS
> > contexts to handle RSS tables"), which was merged
> > almost an year after d33ec4525007 ("net: mvpp2: add an RSS
> > classification step for each flow"), so I assume that between these
> > two commits either the feature was working or it was disable and we
> > didn't notice
> >
> > Without knowing what was happening, which commit should my Fixes tag point to?
>
> Let me make sure that I get this clear:
>
> - Prior to 895586d5dc32, you can turn on and off rxhash without issue
> on any port.
> - After 895586d5dc32, turning rxhash on eth2 prevents reception.
>
> Prior to 895586d5dc32, with rxhash on, it looks like hashing using
> CRC32 is supported but only one context. So, if it's possible to
> enable rxhash on any port on the mcbin without 895586d5dc32, and the
> port continues to work, I'd say the bug was introduced by
> 895586d5dc32.
>
> Of course, that would be reinforced if there was a measurable
> difference in performance due to rxhash on each port.
I've just run this test, but I can detect no difference in performance
with or without 895586d5dc32 on eth0 or eth2 on the mcbin (apart from
eth2 stopping working with 895586d5dc32 applied.) I tested this by
reverting almost all changes to the mvpp2 driver between 5.6 and that
commit.
That's not too surprising; I'm using my cex7 platform with the Mellanox
card in for one end of the 10G link, and that platform doesn't seem to
be able to saturdate a 10G link - it only seems to manage around 4Gbps.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Sat, May 9, 2020 at 4:49 PM Russell King - ARM Linux admin
<[email protected]> wrote:
>
> On Sat, May 09, 2020 at 02:51:05PM +0100, Russell King - ARM Linux admin wrote:
> > On Sat, May 09, 2020 at 03:14:05PM +0200, Matteo Croce wrote:
> > > On Sat, May 9, 2020 at 1:45 PM Russell King - ARM Linux admin
> > > <[email protected]> wrote:
> > > >
> > > > On Sat, May 09, 2020 at 11:15:58AM +0000, Stefan Chulski wrote:
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Matteo Croce <[email protected]>
> > > > > > Sent: Saturday, May 9, 2020 3:13 AM
> > > > > > To: David S . Miller <[email protected]>
> > > > > > Cc: Maxime Chevallier <[email protected]>; netdev
> > > > > > <[email protected]>; LKML <[email protected]>; Antoine
> > > > > > Tenart <[email protected]>; Thomas Petazzoni
> > > > > > <[email protected]>; [email protected];
> > > > > > [email protected]; Nadav Haklai <[email protected]>; Stefan
> > > > > > Chulski <[email protected]>; Marcin Wojtas <[email protected]>; Linux
> > > > > > ARM <[email protected]>; Russell King - ARM Linux admin
> > > > > > <[email protected]>
> > > > > > Subject: [EXT] Re: [PATCH net-next 3/5] net: mvpp2: cls: Use RSS contexts to
> > > > > > handle RSS tables
> > > > > >
> > > > > > Hi,
> > > > > >
> > > > > > What do you think about temporarily disabling it like this?
> > > > > >
> > > > > > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > > > > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > > > > @@ -5775,7 +5775,8 @@ static int mvpp2_port_probe(struct platform_device
> > > > > > *pdev,
> > > > > > NETIF_F_HW_VLAN_CTAG_FILTER;
> > > > > >
> > > > > > if (mvpp22_rss_is_supported()) {
> > > > > > - dev->hw_features |= NETIF_F_RXHASH;
> > > > > > + if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > > > > > + dev->hw_features |= NETIF_F_RXHASH;
> > > > > > dev->features |= NETIF_F_NTUPLE;
> > > > > > }
> > > > > >
> > > > > >
> > > > > > David, is this "workaround" too bad to get accepted?
> > > > >
> > > > > Not sure that RSS related to physical interface(SGMII), better just remove NETIF_F_RXHASH as "workaround".
> > > >
> > > > Hmm, I'm not sure this is the right way forward. This patch has the
> > > > effect of disabling:
> > > >
> > > > d33ec4525007 ("net: mvpp2: add an RSS classification step for each flow")
> > > >
> > > > but the commit you're pointing at which caused the regression is:
> > > >
> > > > 895586d5dc32 ("net: mvpp2: cls: Use RSS contexts to handle RSS tables")
> > > >
> > > >
> > >
> > > Hi,
> > >
> > > When git bisect pointed to 895586d5dc32 ("net: mvpp2: cls: Use RSS
> > > contexts to handle RSS tables"), which was merged
> > > almost an year after d33ec4525007 ("net: mvpp2: add an RSS
> > > classification step for each flow"), so I assume that between these
> > > two commits either the feature was working or it was disable and we
> > > didn't notice
> > >
> > > Without knowing what was happening, which commit should my Fixes tag point to?
> >
> > Let me make sure that I get this clear:
> >
> > - Prior to 895586d5dc32, you can turn on and off rxhash without issue
> > on any port.
> > - After 895586d5dc32, turning rxhash on eth2 prevents reception.
> >
> > Prior to 895586d5dc32, with rxhash on, it looks like hashing using
> > CRC32 is supported but only one context. So, if it's possible to
> > enable rxhash on any port on the mcbin without 895586d5dc32, and the
> > port continues to work, I'd say the bug was introduced by
> > 895586d5dc32.
> >
> > Of course, that would be reinforced if there was a measurable
> > difference in performance due to rxhash on each port.
>
> I've just run this test, but I can detect no difference in performance
> with or without 895586d5dc32 on eth0 or eth2 on the mcbin (apart from
> eth2 stopping working with 895586d5dc32 applied.) I tested this by
> reverting almost all changes to the mvpp2 driver between 5.6 and that
> commit.
>
> That's not too surprising; I'm using my cex7 platform with the Mellanox
> card in for one end of the 10G link, and that platform doesn't seem to
> be able to saturdate a 10G link - it only seems to manage around 4Gbps.
>
> --
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
>
Well it depends on the traffic type. I used to generate 5k flows with
T-Rex and an Intel X710 card.
This way t-rex changes the UDP port of every packet:
root@macchiatobin:~# tcpdump -tnni eth0
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
IP 16.0.0.18.9874 > 48.0.0.81.2001: UDP, length 18
IP 16.0.0.248.56289 > 48.0.192.56.2001: UDP, length 18
IP 16.0.0.154.44965 > 48.0.128.26.2001: UDP, length 18
IP 16.0.0.23.31363 > 48.0.0.86.2001: UDP, length 18
IP 16.0.0.192.1674 > 48.0.192.63.2001: UDP, length 18
IP 16.0.0.155.62370 > 48.0.128.27.2001: UDP, length 18
IP 16.0.0.30.22126 > 48.0.0.93.2001: UDP, length 18
IP 16.0.0.195.51329 > 48.0.192.66.2001: UDP, length 18
IP 16.0.0.160.18323 > 48.0.128.32.2001: UDP, length 18
IP 16.0.0.199.55413 > 48.0.192.70.2001: UDP, length 18
And here RX hash gives a huge performance gain.
root@macchiatobin:~# utraf eth0
tx: 0 bps 0 pps rx: 425.5 Mbps 886.5 Kpps
tx: 0 bps 0 pps rx: 426.0 Mbps 887.6 Kpps
tx: 0 bps 0 pps rx: 425.3 Mbps 886.1 Kpps
tx: 0 bps 0 pps rx: 425.2 Mbps 885.8 Kpps
root@macchiatobin:~# ethtool -K eth0 rxhash on
root@macchiatobin:~# utraf eth0
tx: 0 bps 0 pps rx: 1595 Mbps 3323 Kpps
tx: 0 bps 0 pps rx: 1593 Mbps 3319 Kpps
tx: 0 bps 0 pps rx: 1595 Mbps 3323 Kpps
tx: 0 bps 0 pps rx: 1594 Mbps 3320 Kpps
utraf is just a tool which reads netlink statistics, packets are
dropped with a tc rule.
Regards,
--
Matteo Croce
per aspera ad upstream
On Sat, May 09, 2020 at 03:14:05PM +0200, Matteo Croce wrote:
> Hi,
>
> When git bisect pointed to 895586d5dc32 ("net: mvpp2: cls: Use RSS
> contexts to handle RSS tables"), which was merged
> almost an year after d33ec4525007 ("net: mvpp2: add an RSS
> classification step for each flow"), so I assume that between these
> two commits either the feature was working or it was disable and we
> didn't notice
>
> Without knowing what was happening, which commit should my Fixes tag point to?
It is highly likely that 895586d5dc32 is responsible for this breakage.
I've been investigating this afternoon, and what I've found, comparing
a kernel without 895586d5dc32 and with 895586d5dc32 applied is:
- The table programmed into the hardware via mvpp22_rss_fill_table()
appears to be identical with or without the commit.
- When rxhash is enabled on eth2, mvpp2_rss_port_c2_enable() reports
that c2.attr[0] and c2.attr[2] are written back containing:
- with 895586d5dc32, failing: 00200000 40000000
- without 895586d5dc32, working: 04000000 40000000
- When disabling rxhash, c2.attr[0] and c2.attr[2] are written back as:
04000000 00000000
The second value represents the MVPP22_CLS_C2_ATTR2_RSS_EN bit, the
first value is the queue number, which comprises two fields. The high
5 bits are 24:29 and the low three are 21:23 inclusive. This comes
from:
c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
MVPP22_CLS_C2_ATTR0_QLOW(ql);
#define MVPP22_CLS_C2_ATTR0_QHIGH(qh) (((qh) & 0x1f) << 24)
#define MVPP22_CLS_C2_ATTR0_QLOW(ql) (((ql) & 0x7) << 21)
So, the working case gives eth2 a queue id of 4.0, or 32 as per
port->first_rxq, and the non-working case a queue id of 0.1, or 1.
The allocation of queue IDs seems to be in mvpp2_port_probe():
if (priv->hw_version == MVPP21)
port->first_rxq = port->id * port->nrxqs;
else
port->first_rxq = port->id * priv->max_port_rxqs;
Where:
if (priv->hw_version == MVPP21)
priv->max_port_rxqs = 8;
else
priv->max_port_rxqs = 32;
Making the port 0 (eth0 / eth1) have port->first_rxq = 0, and port 1
(eth2) be 32. It seems the idea is that the first 32 queues belong to
port 0, the second 32 queues belong to port 1, etc.
mvpp2_rss_port_c2_enable() gets the queue number from it's parameter,
'ctx', which comes from mvpp22_rss_ctx(port, 0). This returns
port->rss_ctx[0].
mvpp22_rss_context_create() is responsible for allocating that, which
it does by looking for an unallocated priv->rss_tables[] pointer. This
table is shared amongst all ports on the CP silicon.
When we write the tables in mvpp22_rss_fill_table(), the RSS table
entry is defined by:
u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
MVPP22_RSS_INDEX_TABLE_ENTRY(i);
where rss_ctx is the context ID (queue number) and i is the index in
the table.
#define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) (idx)
#define MVPP22_RSS_INDEX_TABLE(idx) ((idx) << 8)
#define MVPP22_RSS_INDEX_QUEUE(idx) ((idx) << 16)
If we look at what is written:
- The first table to be written has "sel" values of 00000000..0000001f,
containing values 0..3. This appears to be for eth1. This is table 0,
RX queue number 0.
- The second table has "sel" values of 00000100..0000011f, and appears
to be for eth2. These contain values 0x20..0x23. This is table 1,
RX queue number 0.
- The third table has "sel" values of 00000200..0000021f, and appears
to be for eth3. These contain values 0x40..0x43. This is table 2,
RX queue number 0.
Okay, so how do queue numbers translate to the RSS table? There is
another table - the RXQ2RSS table, indexed by the MVPP22_RSS_INDEX_QUEUE
field of MVPP22_RSS_INDEX and accessed through the MVPP22_RXQ2RSS_TABLE
register. Before 895586d5dc32, it was:
mvpp2_write(priv, MVPP22_RSS_INDEX,
MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
MVPP22_RSS_TABLE_POINTER(port->id));
and after:
mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
So, before the commit, for eth2, that would've contained '32' for the
index and '1' for the table pointer - mapping queue 32 to table 1.
Remember that this is queue-high.queue-low of 4.0.
After the commit, we appear to map queue 1 to table 1. That again
looks fine on the face of it.
Section 9.3.1 of the A8040 manual seems indicate the reason that the
queue number is separated. queue-low seems to always come from the
classifier, whereas queue-high can be from the ingress physical port
number or the classifier depending on the MVPP2_CLS_SWFWD_PCTRL_REG.
We set the port bit in MVPP2_CLS_SWFWD_PCTRL_REG, meaning that queue-high
comes from the MVPP2_CLS_SWFWD_P2HQ_REG() register... and this seems to
be where our bug comes from.
mvpp2_cls_oversize_rxq_set() sets this up as:
mvpp2_write(port->priv, MVPP2_CLS_SWFWD_P2HQ_REG(port->id),
(port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
so, the queue-high for eth2 is _always_ 4, meaning that only queues
32 through 39 inclusive are available to eth2. Yet, we're trying to
tell the classifier to set queue-high, which will be ignored, to zero.
So we end up directing traffic from eth2 not to queue 1, but to queue
33, and then we tell it to look up queue 33 in the RSS table. However,
RSS table has not been programmed for queue 33, and so it ends up
(presumably) dropping the packets.
It seems that mvpp22_rss_context_create() doesn't take account of the
fact that the upper 5 bits of the queue ID can't actually be changed
due to the settings in mvpp2_cls_oversize_rxq_set(), _or_ it seems
that mvpp2_cls_oversize_rxq_set() has been missed in this commit.
Either way, these two functions mutually disagree with what queue
number should be used.
So, 895586d5dc32 is indeed the cause of this problem.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Sat, May 09, 2020 at 08:52:46PM +0100, Russell King - ARM Linux admin wrote:
> On Sat, May 09, 2020 at 03:14:05PM +0200, Matteo Croce wrote:
> > Hi,
> >
> > When git bisect pointed to 895586d5dc32 ("net: mvpp2: cls: Use RSS
> > contexts to handle RSS tables"), which was merged
> > almost an year after d33ec4525007 ("net: mvpp2: add an RSS
> > classification step for each flow"), so I assume that between these
> > two commits either the feature was working or it was disable and we
> > didn't notice
> >
> > Without knowing what was happening, which commit should my Fixes tag point to?
>
> It is highly likely that 895586d5dc32 is responsible for this breakage.
> I've been investigating this afternoon, and what I've found, comparing
> a kernel without 895586d5dc32 and with 895586d5dc32 applied is:
>
> - The table programmed into the hardware via mvpp22_rss_fill_table()
> appears to be identical with or without the commit.
>
> - When rxhash is enabled on eth2, mvpp2_rss_port_c2_enable() reports
> that c2.attr[0] and c2.attr[2] are written back containing:
>
> - with 895586d5dc32, failing: 00200000 40000000
> - without 895586d5dc32, working: 04000000 40000000
>
> - When disabling rxhash, c2.attr[0] and c2.attr[2] are written back as:
>
> 04000000 00000000
>
> The second value represents the MVPP22_CLS_C2_ATTR2_RSS_EN bit, the
> first value is the queue number, which comprises two fields. The high
> 5 bits are 24:29 and the low three are 21:23 inclusive. This comes
> from:
>
> c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
> MVPP22_CLS_C2_ATTR0_QLOW(ql);
> #define MVPP22_CLS_C2_ATTR0_QHIGH(qh) (((qh) & 0x1f) << 24)
> #define MVPP22_CLS_C2_ATTR0_QLOW(ql) (((ql) & 0x7) << 21)
>
> So, the working case gives eth2 a queue id of 4.0, or 32 as per
> port->first_rxq, and the non-working case a queue id of 0.1, or 1.
>
> The allocation of queue IDs seems to be in mvpp2_port_probe():
>
> if (priv->hw_version == MVPP21)
> port->first_rxq = port->id * port->nrxqs;
> else
> port->first_rxq = port->id * priv->max_port_rxqs;
>
> Where:
>
> if (priv->hw_version == MVPP21)
> priv->max_port_rxqs = 8;
> else
> priv->max_port_rxqs = 32;
>
> Making the port 0 (eth0 / eth1) have port->first_rxq = 0, and port 1
> (eth2) be 32. It seems the idea is that the first 32 queues belong to
> port 0, the second 32 queues belong to port 1, etc.
>
> mvpp2_rss_port_c2_enable() gets the queue number from it's parameter,
> 'ctx', which comes from mvpp22_rss_ctx(port, 0). This returns
> port->rss_ctx[0].
>
> mvpp22_rss_context_create() is responsible for allocating that, which
> it does by looking for an unallocated priv->rss_tables[] pointer. This
> table is shared amongst all ports on the CP silicon.
>
> When we write the tables in mvpp22_rss_fill_table(), the RSS table
> entry is defined by:
>
> u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
> MVPP22_RSS_INDEX_TABLE_ENTRY(i);
>
> where rss_ctx is the context ID (queue number) and i is the index in
> the table.
>
> #define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) (idx)
> #define MVPP22_RSS_INDEX_TABLE(idx) ((idx) << 8)
> #define MVPP22_RSS_INDEX_QUEUE(idx) ((idx) << 16)
>
> If we look at what is written:
>
> - The first table to be written has "sel" values of 00000000..0000001f,
> containing values 0..3. This appears to be for eth1. This is table 0,
> RX queue number 0.
> - The second table has "sel" values of 00000100..0000011f, and appears
> to be for eth2. These contain values 0x20..0x23. This is table 1,
> RX queue number 0.
> - The third table has "sel" values of 00000200..0000021f, and appears
> to be for eth3. These contain values 0x40..0x43. This is table 2,
> RX queue number 0.
>
> Okay, so how do queue numbers translate to the RSS table? There is
> another table - the RXQ2RSS table, indexed by the MVPP22_RSS_INDEX_QUEUE
> field of MVPP22_RSS_INDEX and accessed through the MVPP22_RXQ2RSS_TABLE
> register. Before 895586d5dc32, it was:
>
> mvpp2_write(priv, MVPP22_RSS_INDEX,
> MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
> mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
> MVPP22_RSS_TABLE_POINTER(port->id));
>
> and after:
>
> mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
> mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
>
> So, before the commit, for eth2, that would've contained '32' for the
> index and '1' for the table pointer - mapping queue 32 to table 1.
> Remember that this is queue-high.queue-low of 4.0.
>
> After the commit, we appear to map queue 1 to table 1. That again
> looks fine on the face of it.
>
> Section 9.3.1 of the A8040 manual seems indicate the reason that the
> queue number is separated. queue-low seems to always come from the
> classifier, whereas queue-high can be from the ingress physical port
> number or the classifier depending on the MVPP2_CLS_SWFWD_PCTRL_REG.
>
> We set the port bit in MVPP2_CLS_SWFWD_PCTRL_REG, meaning that queue-high
> comes from the MVPP2_CLS_SWFWD_P2HQ_REG() register... and this seems to
> be where our bug comes from.
>
> mvpp2_cls_oversize_rxq_set() sets this up as:
>
> mvpp2_write(port->priv, MVPP2_CLS_SWFWD_P2HQ_REG(port->id),
> (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
>
> val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
> val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
> mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
>
> so, the queue-high for eth2 is _always_ 4, meaning that only queues
> 32 through 39 inclusive are available to eth2. Yet, we're trying to
> tell the classifier to set queue-high, which will be ignored, to zero.
>
> So we end up directing traffic from eth2 not to queue 1, but to queue
> 33, and then we tell it to look up queue 33 in the RSS table. However,
> RSS table has not been programmed for queue 33, and so it ends up
> (presumably) dropping the packets.
>
> It seems that mvpp22_rss_context_create() doesn't take account of the
> fact that the upper 5 bits of the queue ID can't actually be changed
> due to the settings in mvpp2_cls_oversize_rxq_set(), _or_ it seems
> that mvpp2_cls_oversize_rxq_set() has been missed in this commit.
> Either way, these two functions mutually disagree with what queue
> number should be used.
>
> So, 895586d5dc32 is indeed the cause of this problem.
Looking deeper into what mvpp2_cls_oversize_rxq_set() and the MTU
validation is doing, it seems that MVPP2_CLS_SWFWD_P2HQ_REG() is
used for at least a couple of things.
So, with the classifier having had RSS enabled and directing eth2
traffic to queue 1, we can not ignore the fact that we may have
packets appearing on queue 32 for this port.
One of the things that queue 32 will be used for is if an over-sized
packet attempts to egress through eth2 - it seems that the A8040 has
the ability to forward frames between its ports. However, afaik we
don't support that feature, and the kernel restricts the packet size,
so we should never violate the MTU validator and end up with such a
packet. In any case, _if_ we were to attempt to transmit an oversized
packet, we have no support in the kernel to deal with that appearing
in the port's receive queue.
Maybe it would be safe to clear the MVPP2_CLS_SWFWD_PCTRL_MASK() bit?
My testing seems to confirm my findings above - clearing this bit
means that if I enable rxhash on eth2, the interface can then pass
traffic, as we are now directing traffic to RX queue 1 rather than
queue 33. Traffic still seems to work with rxhash off as well.
So, I think it's clear where the problem lies, but not what the correct
solution is; someone with more experience of packet classifiers (this
one?) needs to look at this - this is my first venture into these
things, and certainly the first time I've traced through how this is
trying to work (or not)...
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Sat, May 09, 2020 at 09:20:50PM +0100, Russell King - ARM Linux admin wrote:
> On Sat, May 09, 2020 at 08:52:46PM +0100, Russell King - ARM Linux admin wrote:
> > It is highly likely that 895586d5dc32 is responsible for this breakage.
> > I've been investigating this afternoon, and what I've found, comparing
> > a kernel without 895586d5dc32 and with 895586d5dc32 applied is:
> >
> > - The table programmed into the hardware via mvpp22_rss_fill_table()
> > appears to be identical with or without the commit.
> >
> > - When rxhash is enabled on eth2, mvpp2_rss_port_c2_enable() reports
> > that c2.attr[0] and c2.attr[2] are written back containing:
> >
> > - with 895586d5dc32, failing: 00200000 40000000
> > - without 895586d5dc32, working: 04000000 40000000
> >
> > - When disabling rxhash, c2.attr[0] and c2.attr[2] are written back as:
> >
> > 04000000 00000000
> >
> > The second value represents the MVPP22_CLS_C2_ATTR2_RSS_EN bit, the
> > first value is the queue number, which comprises two fields. The high
> > 5 bits are 24:29 and the low three are 21:23 inclusive. This comes
> > from:
> >
> > c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
> > MVPP22_CLS_C2_ATTR0_QLOW(ql);
> > #define MVPP22_CLS_C2_ATTR0_QHIGH(qh) (((qh) & 0x1f) << 24)
> > #define MVPP22_CLS_C2_ATTR0_QLOW(ql) (((ql) & 0x7) << 21)
> >
> > So, the working case gives eth2 a queue id of 4.0, or 32 as per
> > port->first_rxq, and the non-working case a queue id of 0.1, or 1.
> >
> > The allocation of queue IDs seems to be in mvpp2_port_probe():
> >
> > if (priv->hw_version == MVPP21)
> > port->first_rxq = port->id * port->nrxqs;
> > else
> > port->first_rxq = port->id * priv->max_port_rxqs;
> >
> > Where:
> >
> > if (priv->hw_version == MVPP21)
> > priv->max_port_rxqs = 8;
> > else
> > priv->max_port_rxqs = 32;
> >
> > Making the port 0 (eth0 / eth1) have port->first_rxq = 0, and port 1
> > (eth2) be 32. It seems the idea is that the first 32 queues belong to
> > port 0, the second 32 queues belong to port 1, etc.
> >
> > mvpp2_rss_port_c2_enable() gets the queue number from it's parameter,
> > 'ctx', which comes from mvpp22_rss_ctx(port, 0). This returns
> > port->rss_ctx[0].
> >
> > mvpp22_rss_context_create() is responsible for allocating that, which
> > it does by looking for an unallocated priv->rss_tables[] pointer. This
> > table is shared amongst all ports on the CP silicon.
> >
> > When we write the tables in mvpp22_rss_fill_table(), the RSS table
> > entry is defined by:
> >
> > u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
> > MVPP22_RSS_INDEX_TABLE_ENTRY(i);
> >
> > where rss_ctx is the context ID (queue number) and i is the index in
> > the table.
> >
> > #define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) (idx)
> > #define MVPP22_RSS_INDEX_TABLE(idx) ((idx) << 8)
> > #define MVPP22_RSS_INDEX_QUEUE(idx) ((idx) << 16)
> >
> > If we look at what is written:
> >
> > - The first table to be written has "sel" values of 00000000..0000001f,
> > containing values 0..3. This appears to be for eth1. This is table 0,
> > RX queue number 0.
> > - The second table has "sel" values of 00000100..0000011f, and appears
> > to be for eth2. These contain values 0x20..0x23. This is table 1,
> > RX queue number 0.
> > - The third table has "sel" values of 00000200..0000021f, and appears
> > to be for eth3. These contain values 0x40..0x43. This is table 2,
> > RX queue number 0.
> >
> > Okay, so how do queue numbers translate to the RSS table? There is
> > another table - the RXQ2RSS table, indexed by the MVPP22_RSS_INDEX_QUEUE
> > field of MVPP22_RSS_INDEX and accessed through the MVPP22_RXQ2RSS_TABLE
> > register. Before 895586d5dc32, it was:
> >
> > mvpp2_write(priv, MVPP22_RSS_INDEX,
> > MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
> > mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
> > MVPP22_RSS_TABLE_POINTER(port->id));
> >
> > and after:
> >
> > mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
> > mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
> >
> > So, before the commit, for eth2, that would've contained '32' for the
> > index and '1' for the table pointer - mapping queue 32 to table 1.
> > Remember that this is queue-high.queue-low of 4.0.
> >
> > After the commit, we appear to map queue 1 to table 1. That again
> > looks fine on the face of it.
> >
> > Section 9.3.1 of the A8040 manual seems indicate the reason that the
> > queue number is separated. queue-low seems to always come from the
> > classifier, whereas queue-high can be from the ingress physical port
> > number or the classifier depending on the MVPP2_CLS_SWFWD_PCTRL_REG.
> >
> > We set the port bit in MVPP2_CLS_SWFWD_PCTRL_REG, meaning that queue-high
> > comes from the MVPP2_CLS_SWFWD_P2HQ_REG() register... and this seems to
> > be where our bug comes from.
> >
> > mvpp2_cls_oversize_rxq_set() sets this up as:
> >
> > mvpp2_write(port->priv, MVPP2_CLS_SWFWD_P2HQ_REG(port->id),
> > (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
> >
> > val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
> > val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
> > mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
> >
> > so, the queue-high for eth2 is _always_ 4, meaning that only queues
> > 32 through 39 inclusive are available to eth2. Yet, we're trying to
> > tell the classifier to set queue-high, which will be ignored, to zero.
> >
> > So we end up directing traffic from eth2 not to queue 1, but to queue
> > 33, and then we tell it to look up queue 33 in the RSS table. However,
> > RSS table has not been programmed for queue 33, and so it ends up
> > (presumably) dropping the packets.
> >
> > It seems that mvpp22_rss_context_create() doesn't take account of the
> > fact that the upper 5 bits of the queue ID can't actually be changed
> > due to the settings in mvpp2_cls_oversize_rxq_set(), _or_ it seems
> > that mvpp2_cls_oversize_rxq_set() has been missed in this commit.
> > Either way, these two functions mutually disagree with what queue
> > number should be used.
> >
> > So, 895586d5dc32 is indeed the cause of this problem.
>
> Looking deeper into what mvpp2_cls_oversize_rxq_set() and the MTU
> validation is doing, it seems that MVPP2_CLS_SWFWD_P2HQ_REG() is
> used for at least a couple of things.
>
> So, with the classifier having had RSS enabled and directing eth2
> traffic to queue 1, we can not ignore the fact that we may have
> packets appearing on queue 32 for this port.
>
> One of the things that queue 32 will be used for is if an over-sized
> packet attempts to egress through eth2 - it seems that the A8040 has
> the ability to forward frames between its ports. However, afaik we
> don't support that feature, and the kernel restricts the packet size,
> so we should never violate the MTU validator and end up with such a
> packet. In any case, _if_ we were to attempt to transmit an oversized
> packet, we have no support in the kernel to deal with that appearing
> in the port's receive queue.
>
> Maybe it would be safe to clear the MVPP2_CLS_SWFWD_PCTRL_MASK() bit?
>
> My testing seems to confirm my findings above - clearing this bit
> means that if I enable rxhash on eth2, the interface can then pass
> traffic, as we are now directing traffic to RX queue 1 rather than
> queue 33. Traffic still seems to work with rxhash off as well.
>
> So, I think it's clear where the problem lies, but not what the correct
> solution is; someone with more experience of packet classifiers (this
> one?) needs to look at this - this is my first venture into these
> things, and certainly the first time I've traced through how this is
> trying to work (or not)...
This is what I was using here to work around the problem, and what I
mentioned above.
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index fd221d88811e..0dd3b65822dd 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -1058,7 +1058,7 @@ void mvpp2_cls_oversize_rxq_set(struct mvpp2_port *port)
(port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
- val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
+ val &= ~MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
}
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC for 0.8m (est. 1762m) line in suburbia: sync at 13.1Mbps down 424kbps up
On Tue, May 19, 2020 at 11:54 AM Russell King - ARM Linux admin
<[email protected]> wrote:
>
> On Sat, May 09, 2020 at 09:20:50PM +0100, Russell King - ARM Linux admin wrote:
> > On Sat, May 09, 2020 at 08:52:46PM +0100, Russell King - ARM Linux admin wrote:
> > > It is highly likely that 895586d5dc32 is responsible for this breakage.
> > > I've been investigating this afternoon, and what I've found, comparing
> > > a kernel without 895586d5dc32 and with 895586d5dc32 applied is:
> > >
> > > - The table programmed into the hardware via mvpp22_rss_fill_table()
> > > appears to be identical with or without the commit.
> > >
> > > - When rxhash is enabled on eth2, mvpp2_rss_port_c2_enable() reports
> > > that c2.attr[0] and c2.attr[2] are written back containing:
> > >
> > > - with 895586d5dc32, failing: 00200000 40000000
> > > - without 895586d5dc32, working: 04000000 40000000
> > >
> > > - When disabling rxhash, c2.attr[0] and c2.attr[2] are written back as:
> > >
> > > 04000000 00000000
> > >
> > > The second value represents the MVPP22_CLS_C2_ATTR2_RSS_EN bit, the
> > > first value is the queue number, which comprises two fields. The high
> > > 5 bits are 24:29 and the low three are 21:23 inclusive. This comes
> > > from:
> > >
> > > c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
> > > MVPP22_CLS_C2_ATTR0_QLOW(ql);
> > > #define MVPP22_CLS_C2_ATTR0_QHIGH(qh) (((qh) & 0x1f) << 24)
> > > #define MVPP22_CLS_C2_ATTR0_QLOW(ql) (((ql) & 0x7) << 21)
> > >
> > > So, the working case gives eth2 a queue id of 4.0, or 32 as per
> > > port->first_rxq, and the non-working case a queue id of 0.1, or 1.
> > >
> > > The allocation of queue IDs seems to be in mvpp2_port_probe():
> > >
> > > if (priv->hw_version == MVPP21)
> > > port->first_rxq = port->id * port->nrxqs;
> > > else
> > > port->first_rxq = port->id * priv->max_port_rxqs;
> > >
> > > Where:
> > >
> > > if (priv->hw_version == MVPP21)
> > > priv->max_port_rxqs = 8;
> > > else
> > > priv->max_port_rxqs = 32;
> > >
> > > Making the port 0 (eth0 / eth1) have port->first_rxq = 0, and port 1
> > > (eth2) be 32. It seems the idea is that the first 32 queues belong to
> > > port 0, the second 32 queues belong to port 1, etc.
> > >
> > > mvpp2_rss_port_c2_enable() gets the queue number from it's parameter,
> > > 'ctx', which comes from mvpp22_rss_ctx(port, 0). This returns
> > > port->rss_ctx[0].
> > >
> > > mvpp22_rss_context_create() is responsible for allocating that, which
> > > it does by looking for an unallocated priv->rss_tables[] pointer. This
> > > table is shared amongst all ports on the CP silicon.
> > >
> > > When we write the tables in mvpp22_rss_fill_table(), the RSS table
> > > entry is defined by:
> > >
> > > u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
> > > MVPP22_RSS_INDEX_TABLE_ENTRY(i);
> > >
> > > where rss_ctx is the context ID (queue number) and i is the index in
> > > the table.
> > >
> > > #define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) (idx)
> > > #define MVPP22_RSS_INDEX_TABLE(idx) ((idx) << 8)
> > > #define MVPP22_RSS_INDEX_QUEUE(idx) ((idx) << 16)
> > >
> > > If we look at what is written:
> > >
> > > - The first table to be written has "sel" values of 00000000..0000001f,
> > > containing values 0..3. This appears to be for eth1. This is table 0,
> > > RX queue number 0.
> > > - The second table has "sel" values of 00000100..0000011f, and appears
> > > to be for eth2. These contain values 0x20..0x23. This is table 1,
> > > RX queue number 0.
> > > - The third table has "sel" values of 00000200..0000021f, and appears
> > > to be for eth3. These contain values 0x40..0x43. This is table 2,
> > > RX queue number 0.
> > >
> > > Okay, so how do queue numbers translate to the RSS table? There is
> > > another table - the RXQ2RSS table, indexed by the MVPP22_RSS_INDEX_QUEUE
> > > field of MVPP22_RSS_INDEX and accessed through the MVPP22_RXQ2RSS_TABLE
> > > register. Before 895586d5dc32, it was:
> > >
> > > mvpp2_write(priv, MVPP22_RSS_INDEX,
> > > MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
> > > mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
> > > MVPP22_RSS_TABLE_POINTER(port->id));
> > >
> > > and after:
> > >
> > > mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
> > > mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
> > >
> > > So, before the commit, for eth2, that would've contained '32' for the
> > > index and '1' for the table pointer - mapping queue 32 to table 1.
> > > Remember that this is queue-high.queue-low of 4.0.
> > >
> > > After the commit, we appear to map queue 1 to table 1. That again
> > > looks fine on the face of it.
> > >
> > > Section 9.3.1 of the A8040 manual seems indicate the reason that the
> > > queue number is separated. queue-low seems to always come from the
> > > classifier, whereas queue-high can be from the ingress physical port
> > > number or the classifier depending on the MVPP2_CLS_SWFWD_PCTRL_REG.
> > >
> > > We set the port bit in MVPP2_CLS_SWFWD_PCTRL_REG, meaning that queue-high
> > > comes from the MVPP2_CLS_SWFWD_P2HQ_REG() register... and this seems to
> > > be where our bug comes from.
> > >
> > > mvpp2_cls_oversize_rxq_set() sets this up as:
> > >
> > > mvpp2_write(port->priv, MVPP2_CLS_SWFWD_P2HQ_REG(port->id),
> > > (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
> > >
> > > val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
> > > val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
> > > mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
> > >
> > > so, the queue-high for eth2 is _always_ 4, meaning that only queues
> > > 32 through 39 inclusive are available to eth2. Yet, we're trying to
> > > tell the classifier to set queue-high, which will be ignored, to zero.
> > >
> > > So we end up directing traffic from eth2 not to queue 1, but to queue
> > > 33, and then we tell it to look up queue 33 in the RSS table. However,
> > > RSS table has not been programmed for queue 33, and so it ends up
> > > (presumably) dropping the packets.
> > >
> > > It seems that mvpp22_rss_context_create() doesn't take account of the
> > > fact that the upper 5 bits of the queue ID can't actually be changed
> > > due to the settings in mvpp2_cls_oversize_rxq_set(), _or_ it seems
> > > that mvpp2_cls_oversize_rxq_set() has been missed in this commit.
> > > Either way, these two functions mutually disagree with what queue
> > > number should be used.
> > >
> > > So, 895586d5dc32 is indeed the cause of this problem.
> >
> > Looking deeper into what mvpp2_cls_oversize_rxq_set() and the MTU
> > validation is doing, it seems that MVPP2_CLS_SWFWD_P2HQ_REG() is
> > used for at least a couple of things.
> >
> > So, with the classifier having had RSS enabled and directing eth2
> > traffic to queue 1, we can not ignore the fact that we may have
> > packets appearing on queue 32 for this port.
> >
> > One of the things that queue 32 will be used for is if an over-sized
> > packet attempts to egress through eth2 - it seems that the A8040 has
> > the ability to forward frames between its ports. However, afaik we
> > don't support that feature, and the kernel restricts the packet size,
> > so we should never violate the MTU validator and end up with such a
> > packet. In any case, _if_ we were to attempt to transmit an oversized
> > packet, we have no support in the kernel to deal with that appearing
> > in the port's receive queue.
> >
> > Maybe it would be safe to clear the MVPP2_CLS_SWFWD_PCTRL_MASK() bit?
> >
> > My testing seems to confirm my findings above - clearing this bit
> > means that if I enable rxhash on eth2, the interface can then pass
> > traffic, as we are now directing traffic to RX queue 1 rather than
> > queue 33. Traffic still seems to work with rxhash off as well.
> >
> > So, I think it's clear where the problem lies, but not what the correct
> > solution is; someone with more experience of packet classifiers (this
> > one?) needs to look at this - this is my first venture into these
> > things, and certainly the first time I've traced through how this is
> > trying to work (or not)...
>
> This is what I was using here to work around the problem, and what I
> mentioned above.
>
> diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
> index fd221d88811e..0dd3b65822dd 100644
> --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
> +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
> @@ -1058,7 +1058,7 @@ void mvpp2_cls_oversize_rxq_set(struct mvpp2_port *port)
> (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
>
> val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
> - val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
> + val &= ~MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
> mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
> }
>
>
Hi,
I will try this change and let you know if it works.
Thanks
--
Matteo Croce
per aspera ad upstream
On Tue, 19 May 2020 12:05:20 +0200
Matteo Croce <[email protected]> wrote:
> On Tue, May 19, 2020 at 11:54 AM Russell King - ARM Linux admin
> <[email protected]> wrote:
> >
> > On Sat, May 09, 2020 at 09:20:50PM +0100, Russell King - ARM Linux
> > admin wrote:
> > > On Sat, May 09, 2020 at 08:52:46PM +0100, Russell King - ARM
> > > Linux admin wrote:
> > > > It is highly likely that 895586d5dc32 is responsible for this
> > > > breakage. I've been investigating this afternoon, and what I've
> > > > found, comparing a kernel without 895586d5dc32 and with
> > > > 895586d5dc32 applied is:
> > > >
> > > > - The table programmed into the hardware via
> > > > mvpp22_rss_fill_table() appears to be identical with or without
> > > > the commit.
> > > >
> > > > - When rxhash is enabled on eth2, mvpp2_rss_port_c2_enable()
> > > > reports that c2.attr[0] and c2.attr[2] are written back
> > > > containing:
> > > >
> > > > - with 895586d5dc32, failing: 00200000 40000000
> > > > - without 895586d5dc32, working: 04000000 40000000
> > > >
> > > > - When disabling rxhash, c2.attr[0] and c2.attr[2] are written
> > > > back as:
> > > >
> > > > 04000000 00000000
> > > >
> > > > The second value represents the MVPP22_CLS_C2_ATTR2_RSS_EN bit,
> > > > the first value is the queue number, which comprises two
> > > > fields. The high 5 bits are 24:29 and the low three are 21:23
> > > > inclusive. This comes from:
> > > >
> > > > c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
> > > > MVPP22_CLS_C2_ATTR0_QLOW(ql);
> > > > #define MVPP22_CLS_C2_ATTR0_QHIGH(qh) (((qh) & 0x1f)
> > > > << 24) #define MVPP22_CLS_C2_ATTR0_QLOW(ql) (((ql) &
> > > > 0x7) << 21)
> > > >
> > > > So, the working case gives eth2 a queue id of 4.0, or 32 as per
> > > > port->first_rxq, and the non-working case a queue id of 0.1, or
> > > > 1.
> > > >
> > > > The allocation of queue IDs seems to be in mvpp2_port_probe():
> > > >
> > > > if (priv->hw_version == MVPP21)
> > > > port->first_rxq = port->id * port->nrxqs;
> > > > else
> > > > port->first_rxq = port->id *
> > > > priv->max_port_rxqs;
> > > >
> > > > Where:
> > > >
> > > > if (priv->hw_version == MVPP21)
> > > > priv->max_port_rxqs = 8;
> > > > else
> > > > priv->max_port_rxqs = 32;
> > > >
> > > > Making the port 0 (eth0 / eth1) have port->first_rxq = 0, and
> > > > port 1 (eth2) be 32. It seems the idea is that the first 32
> > > > queues belong to port 0, the second 32 queues belong to port 1,
> > > > etc.
> > > >
> > > > mvpp2_rss_port_c2_enable() gets the queue number from it's
> > > > parameter, 'ctx', which comes from mvpp22_rss_ctx(port, 0).
> > > > This returns port->rss_ctx[0].
> > > >
> > > > mvpp22_rss_context_create() is responsible for allocating that,
> > > > which it does by looking for an unallocated priv->rss_tables[]
> > > > pointer. This table is shared amongst all ports on the CP
> > > > silicon.
> > > >
> > > > When we write the tables in mvpp22_rss_fill_table(), the RSS
> > > > table entry is defined by:
> > > >
> > > > u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
> > > > MVPP22_RSS_INDEX_TABLE_ENTRY(i);
> > > >
> > > > where rss_ctx is the context ID (queue number) and i is the
> > > > index in the table.
> > > >
> > > > #define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) (idx)
> > > > #define MVPP22_RSS_INDEX_TABLE(idx) ((idx) << 8)
> > > > #define MVPP22_RSS_INDEX_QUEUE(idx) ((idx) << 16)
> > > >
> > > > If we look at what is written:
> > > >
> > > > - The first table to be written has "sel" values of
> > > > 00000000..0000001f, containing values 0..3. This appears to be
> > > > for eth1. This is table 0, RX queue number 0.
> > > > - The second table has "sel" values of 00000100..0000011f, and
> > > > appears to be for eth2. These contain values 0x20..0x23. This
> > > > is table 1, RX queue number 0.
> > > > - The third table has "sel" values of 00000200..0000021f, and
> > > > appears to be for eth3. These contain values 0x40..0x43. This
> > > > is table 2, RX queue number 0.
> > > >
> > > > Okay, so how do queue numbers translate to the RSS table?
> > > > There is another table - the RXQ2RSS table, indexed by the
> > > > MVPP22_RSS_INDEX_QUEUE field of MVPP22_RSS_INDEX and accessed
> > > > through the MVPP22_RXQ2RSS_TABLE register. Before
> > > > 895586d5dc32, it was:
> > > >
> > > > mvpp2_write(priv, MVPP22_RSS_INDEX,
> > > > MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
> > > > mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
> > > > MVPP22_RSS_TABLE_POINTER(port->id));
> > > >
> > > > and after:
> > > >
> > > > mvpp2_write(priv, MVPP22_RSS_INDEX,
> > > > MVPP22_RSS_INDEX_QUEUE(ctx)); mvpp2_write(priv,
> > > > MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
> > > >
> > > > So, before the commit, for eth2, that would've contained '32'
> > > > for the index and '1' for the table pointer - mapping queue 32
> > > > to table 1. Remember that this is queue-high.queue-low of 4.0.
> > > >
> > > > After the commit, we appear to map queue 1 to table 1. That
> > > > again looks fine on the face of it.
> > > >
> > > > Section 9.3.1 of the A8040 manual seems indicate the reason
> > > > that the queue number is separated. queue-low seems to always
> > > > come from the classifier, whereas queue-high can be from the
> > > > ingress physical port number or the classifier depending on the
> > > > MVPP2_CLS_SWFWD_PCTRL_REG.
> > > >
> > > > We set the port bit in MVPP2_CLS_SWFWD_PCTRL_REG, meaning that
> > > > queue-high comes from the MVPP2_CLS_SWFWD_P2HQ_REG()
> > > > register... and this seems to be where our bug comes from.
> > > >
> > > > mvpp2_cls_oversize_rxq_set() sets this up as:
> > > >
> > > > mvpp2_write(port->priv,
> > > > MVPP2_CLS_SWFWD_P2HQ_REG(port->id), (port->first_rxq >>
> > > > MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
> > > >
> > > > val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
> > > > val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
> > > > mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
> > > >
> > > > so, the queue-high for eth2 is _always_ 4, meaning that only
> > > > queues 32 through 39 inclusive are available to eth2. Yet,
> > > > we're trying to tell the classifier to set queue-high, which
> > > > will be ignored, to zero.
> > > >
> > > > So we end up directing traffic from eth2 not to queue 1, but to
> > > > queue 33, and then we tell it to look up queue 33 in the RSS
> > > > table. However, RSS table has not been programmed for queue
> > > > 33, and so it ends up (presumably) dropping the packets.
> > > >
> > > > It seems that mvpp22_rss_context_create() doesn't take account
> > > > of the fact that the upper 5 bits of the queue ID can't
> > > > actually be changed due to the settings in
> > > > mvpp2_cls_oversize_rxq_set(), _or_ it seems that
> > > > mvpp2_cls_oversize_rxq_set() has been missed in this commit.
> > > > Either way, these two functions mutually disagree with what
> > > > queue number should be used.
> > > >
> > > > So, 895586d5dc32 is indeed the cause of this problem.
> > >
> > > Looking deeper into what mvpp2_cls_oversize_rxq_set() and the MTU
> > > validation is doing, it seems that MVPP2_CLS_SWFWD_P2HQ_REG() is
> > > used for at least a couple of things.
> > >
> > > So, with the classifier having had RSS enabled and directing eth2
> > > traffic to queue 1, we can not ignore the fact that we may have
> > > packets appearing on queue 32 for this port.
> > >
> > > One of the things that queue 32 will be used for is if an
> > > over-sized packet attempts to egress through eth2 - it seems that
> > > the A8040 has the ability to forward frames between its ports.
> > > However, afaik we don't support that feature, and the kernel
> > > restricts the packet size, so we should never violate the MTU
> > > validator and end up with such a packet. In any case, _if_ we
> > > were to attempt to transmit an oversized packet, we have no
> > > support in the kernel to deal with that appearing in the port's
> > > receive queue.
> > >
> > > Maybe it would be safe to clear the MVPP2_CLS_SWFWD_PCTRL_MASK()
> > > bit?
> > >
> > > My testing seems to confirm my findings above - clearing this bit
> > > means that if I enable rxhash on eth2, the interface can then pass
> > > traffic, as we are now directing traffic to RX queue 1 rather than
> > > queue 33. Traffic still seems to work with rxhash off as well.
> > >
> > > So, I think it's clear where the problem lies, but not what the
> > > correct solution is; someone with more experience of packet
> > > classifiers (this one?) needs to look at this - this is my first
> > > venture into these things, and certainly the first time I've
> > > traced through how this is trying to work (or not)...
> >
> > This is what I was using here to work around the problem, and what I
> > mentioned above.
> >
> > diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
> > b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index
> > fd221d88811e..0dd3b65822dd 100644 ---
> > a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++
> > b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -1058,7 +1058,7
> > @@ void mvpp2_cls_oversize_rxq_set(struct mvpp2_port *port)
> > (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS));
> >
> > val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG);
> > - val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
> > + val &= ~MVPP2_CLS_SWFWD_PCTRL_MASK(port->id);
> > mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
> > }
> >
> >
>
> Hi,
>
> I will try this change and let you know if it works.
>
> Thanks
>
Hi,
The patch seems to work. I'm generating traffic with random MAC and IP
addresses, to have many flows:
# tcpdump -tenni eth2
9a:a9:b1:3a:b1:6b > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.4.0 > 192.168.0.4.0: UDP, length 12
9e:92:fd:f8:7f:0a > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.4.0 > 192.168.0.4.0: UDP, length 12
66:b7:11:8a:c2:1f > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.1.0 > 192.168.0.1.0: UDP, length 12
7a:ba:58:bd:9a:62 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.1.0 > 192.168.0.1.0: UDP, length 12
7e:78:a9:97:70:3a > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.2.0 > 192.168.0.2.0: UDP, length 12
b2:81:91:34:ce:42 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.2.0 > 192.168.0.2.0: UDP, length 12
2a:05:52:d0:d9:3f > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.3.0 > 192.168.0.3.0: UDP, length 12
ee:ee:47:35:fa:81 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.3.0 > 192.168.0.3.0: UDP, length 12
This is the default rate, with rxhash off:
# utraf eth2
tx: 0 bps 0 pps rx: 397.4 Mbps 827.9 Kpps
tx: 0 bps 0 pps rx: 396.3 Mbps 825.7 Kpps
tx: 0 bps 0 pps rx: 396.6 Mbps 826.3 Kpps
tx: 0 bps 0 pps rx: 396.5 Mbps 826.1 Kpps
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
9 root 20 0 0 0 0 R 99.7 0.0 7:02.58 ksoftirqd/0
15 root 20 0 0 0 0 S 0.0 0.0 0:00.00 ksoftirqd/1
20 root 20 0 0 0 0 S 0.0 0.0 2:01.48 ksoftirqd/2
25 root 20 0 0 0 0 S 0.0 0.0 0:32.86 ksoftirqd/3
and this with rx hashing enabled:
# ethtool -K eth2 rxhash on
# utraf eth2
tx: 0 bps 0 pps rx: 456.4 Mbps 950.8 Kpps
tx: 0 bps 0 pps rx: 458.4 Mbps 955.0 Kpps
tx: 0 bps 0 pps rx: 457.6 Mbps 953.3 Kpps
tx: 0 bps 0 pps rx: 462.2 Mbps 962.9 Kpps
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
20 root 20 0 0 0 0 R 0.7 0.0 2:02.34 ksoftirqd/2
25 root 20 0 0 0 0 S 0.3 0.0 0:33.25 ksoftirqd/3
9 root 20 0 0 0 0 S 0.0 0.0 7:52.57 ksoftirqd/0
15 root 20 0 0 0 0 S 0.0 0.0 0:00.00 ksoftirqd/1
The throughput doesn't increase so much, maybe we hit an HW limit of
the gigabit port. The interesting thing is how the global CPU usage
drops from 25% to 1%.
I can't explain this, it could be due to the reduced contention?
Regards,
--
Matteo Croce
per aspera ad upstream
On Tue, May 19, 2020 at 07:05:34PM +0200, Matteo Croce wrote:
> On Tue, 19 May 2020 12:05:20 +0200
> Matteo Croce <[email protected]> wrote:
>
> Hi,
>
> The patch seems to work. I'm generating traffic with random MAC and IP
> addresses, to have many flows:
>
> # tcpdump -tenni eth2
> 9a:a9:b1:3a:b1:6b > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.4.0 > 192.168.0.4.0: UDP, length 12
> 9e:92:fd:f8:7f:0a > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.4.0 > 192.168.0.4.0: UDP, length 12
> 66:b7:11:8a:c2:1f > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.1.0 > 192.168.0.1.0: UDP, length 12
> 7a:ba:58:bd:9a:62 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.1.0 > 192.168.0.1.0: UDP, length 12
> 7e:78:a9:97:70:3a > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.2.0 > 192.168.0.2.0: UDP, length 12
> b2:81:91:34:ce:42 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.2.0 > 192.168.0.2.0: UDP, length 12
> 2a:05:52:d0:d9:3f > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.3.0 > 192.168.0.3.0: UDP, length 12
> ee:ee:47:35:fa:81 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.3.0 > 192.168.0.3.0: UDP, length 12
>
> This is the default rate, with rxhash off:
>
> # utraf eth2
> tx: 0 bps 0 pps rx: 397.4 Mbps 827.9 Kpps
> tx: 0 bps 0 pps rx: 396.3 Mbps 825.7 Kpps
> tx: 0 bps 0 pps rx: 396.6 Mbps 826.3 Kpps
> tx: 0 bps 0 pps rx: 396.5 Mbps 826.1 Kpps
>
> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
> 9 root 20 0 0 0 0 R 99.7 0.0 7:02.58 ksoftirqd/0
> 15 root 20 0 0 0 0 S 0.0 0.0 0:00.00 ksoftirqd/1
> 20 root 20 0 0 0 0 S 0.0 0.0 2:01.48 ksoftirqd/2
> 25 root 20 0 0 0 0 S 0.0 0.0 0:32.86 ksoftirqd/3
>
> and this with rx hashing enabled:
>
> # ethtool -K eth2 rxhash on
> # utraf eth2
> tx: 0 bps 0 pps rx: 456.4 Mbps 950.8 Kpps
> tx: 0 bps 0 pps rx: 458.4 Mbps 955.0 Kpps
> tx: 0 bps 0 pps rx: 457.6 Mbps 953.3 Kpps
> tx: 0 bps 0 pps rx: 462.2 Mbps 962.9 Kpps
>
> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
> 20 root 20 0 0 0 0 R 0.7 0.0 2:02.34 ksoftirqd/2
> 25 root 20 0 0 0 0 S 0.3 0.0 0:33.25 ksoftirqd/3
> 9 root 20 0 0 0 0 S 0.0 0.0 7:52.57 ksoftirqd/0
> 15 root 20 0 0 0 0 S 0.0 0.0 0:00.00 ksoftirqd/1
>
>
> The throughput doesn't increase so much, maybe we hit an HW limit of
> the gigabit port. The interesting thing is how the global CPU usage
> drops from 25% to 1%.
> I can't explain this, it could be due to the reduced contention?
Hi Matteo,
Can I take that as a Tested-by ?
Thanks.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC for 0.8m (est. 1762m) line in suburbia: sync at 13.1Mbps down 424kbps up
On Wed, May 20, 2020 at 1:11 PM Russell King - ARM Linux admin
<[email protected]> wrote:
>
> On Tue, May 19, 2020 at 07:05:34PM +0200, Matteo Croce wrote:
> > On Tue, 19 May 2020 12:05:20 +0200
> > Matteo Croce <[email protected]> wrote:
> >
> > Hi,
> >
> > The patch seems to work. I'm generating traffic with random MAC and IP
> > addresses, to have many flows:
> >
> > # tcpdump -tenni eth2
> > 9a:a9:b1:3a:b1:6b > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.4.0 > 192.168.0.4.0: UDP, length 12
> > 9e:92:fd:f8:7f:0a > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.4.0 > 192.168.0.4.0: UDP, length 12
> > 66:b7:11:8a:c2:1f > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.1.0 > 192.168.0.1.0: UDP, length 12
> > 7a:ba:58:bd:9a:62 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.1.0 > 192.168.0.1.0: UDP, length 12
> > 7e:78:a9:97:70:3a > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.2.0 > 192.168.0.2.0: UDP, length 12
> > b2:81:91:34:ce:42 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.2.0 > 192.168.0.2.0: UDP, length 12
> > 2a:05:52:d0:d9:3f > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.3.0 > 192.168.0.3.0: UDP, length 12
> > ee:ee:47:35:fa:81 > 00:51:82:11:22:02, ethertype IPv4 (0x0800), length 60: 10.0.0.3.0 > 192.168.0.3.0: UDP, length 12
> >
> > This is the default rate, with rxhash off:
> >
> > # utraf eth2
> > tx: 0 bps 0 pps rx: 397.4 Mbps 827.9 Kpps
> > tx: 0 bps 0 pps rx: 396.3 Mbps 825.7 Kpps
> > tx: 0 bps 0 pps rx: 396.6 Mbps 826.3 Kpps
> > tx: 0 bps 0 pps rx: 396.5 Mbps 826.1 Kpps
> >
> > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
> > 9 root 20 0 0 0 0 R 99.7 0.0 7:02.58 ksoftirqd/0
> > 15 root 20 0 0 0 0 S 0.0 0.0 0:00.00 ksoftirqd/1
> > 20 root 20 0 0 0 0 S 0.0 0.0 2:01.48 ksoftirqd/2
> > 25 root 20 0 0 0 0 S 0.0 0.0 0:32.86 ksoftirqd/3
> >
> > and this with rx hashing enabled:
> >
> > # ethtool -K eth2 rxhash on
> > # utraf eth2
> > tx: 0 bps 0 pps rx: 456.4 Mbps 950.8 Kpps
> > tx: 0 bps 0 pps rx: 458.4 Mbps 955.0 Kpps
> > tx: 0 bps 0 pps rx: 457.6 Mbps 953.3 Kpps
> > tx: 0 bps 0 pps rx: 462.2 Mbps 962.9 Kpps
> >
> > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
> > 20 root 20 0 0 0 0 R 0.7 0.0 2:02.34 ksoftirqd/2
> > 25 root 20 0 0 0 0 S 0.3 0.0 0:33.25 ksoftirqd/3
> > 9 root 20 0 0 0 0 S 0.0 0.0 7:52.57 ksoftirqd/0
> > 15 root 20 0 0 0 0 S 0.0 0.0 0:00.00 ksoftirqd/1
> >
> >
> > The throughput doesn't increase so much, maybe we hit an HW limit of
> > the gigabit port. The interesting thing is how the global CPU usage
> > drops from 25% to 1%.
> > I can't explain this, it could be due to the reduced contention?
>
> Hi Matteo,
>
> Can I take that as a Tested-by ?
>
> Thanks.
>
> --
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTC for 0.8m (est. 1762m) line in suburbia: sync at 13.1Mbps down 424kbps up
>
Tested-by: Matteo Croce <[email protected]>
probably also:
Reported-by: Matteo Croce <[email protected]>
Thanks,
--
Matteo Croce
per aspera ad upstream