2017-12-21 12:33:29

by Ioana Ciocoi Radulescu

[permalink] [raw]
Subject: [PATCH 1/2] staging: fsl-dpaa2/eth: Flow affinity for IP forwarding

The driver xmit function chooses an egress FQ based on the current
core id. The network stack itself sets a mapping field in the skb
based on many things - the default one being a hash on packet fields,
which the current driver ignores.

This patch saves the ingress frame flow affinity information in the
skb. In case of forwarded frames, this info will then be used for Tx
and Tx confirmation hardware queue selection, ensuring all processing
of the given frame is done on a single core.

Signed-off-by: Bogdan Purcareata <[email protected]>
Signed-off-by: Ioana Radulescu <[email protected]>
---
drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c | 16 ++++++++++------
drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h | 3 ++-
2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
index 7f3e4fa..b63ae09 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
@@ -230,7 +230,8 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch,
const struct dpaa2_fd *fd,
- struct napi_struct *napi)
+ struct napi_struct *napi,
+ u16 queue_id)
{
dma_addr_t addr = dpaa2_fd_get_addr(fd);
u8 fd_format = dpaa2_fd_get_format(fd);
@@ -281,6 +282,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
}

skb->protocol = eth_type_trans(skb, priv->net_dev);
+ skb_record_rx_queue(skb, queue_id);

percpu_stats->rx_packets++;
percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
@@ -325,7 +327,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch)
fq = (struct dpaa2_eth_fq *)dpaa2_dq_fqd_ctx(dq);
fq->stats.frames++;

- fq->consume(priv, ch, fd, &ch->napi);
+ fq->consume(priv, ch, fd, &ch->napi, fq->flowid);
cleaned++;
} while (!is_last);

@@ -588,10 +590,11 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
/* Tracing point */
trace_dpaa2_tx_fd(net_dev, &fd);

- /* TxConf FQ selection primarily based on cpu affinity; this is
- * non-migratable context, so it's safe to call smp_processor_id().
+ /* TxConf FQ selection relies on queue id from the stack.
+ * In case of a forwarded frame from another DPNI interface, we choose
+ * a queue affined to the same core that processed the Rx frame
*/
- queue_mapping = smp_processor_id() % dpaa2_eth_queue_count(priv);
+ queue_mapping = skb_get_queue_mapping(skb);
fq = &priv->fq[queue_mapping];
for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
err = dpaa2_io_service_enqueue_qd(NULL, priv->tx_qdid, 0,
@@ -622,7 +625,8 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch,
const struct dpaa2_fd *fd,
- struct napi_struct *napi __always_unused)
+ struct napi_struct *napi __always_unused,
+ u16 queue_id __always_unused)
{
struct rtnl_link_stats64 *percpu_stats;
struct dpaa2_eth_drv_stats *percpu_extras;
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
index d68ac38..fb8fb5c 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
@@ -276,7 +276,8 @@ struct dpaa2_eth_fq {
void (*consume)(struct dpaa2_eth_priv *,
struct dpaa2_eth_channel *,
const struct dpaa2_fd *,
- struct napi_struct *);
+ struct napi_struct *,
+ u16 queue_id);
struct dpaa2_eth_fq_stats stats;
};

--
2.7.4


2017-12-21 12:33:25

by Ioana Ciocoi Radulescu

[permalink] [raw]
Subject: [PATCH 2/2] staging: fsl-dpaa2/eth: Flow affinity for non-forwarded traffic

The previous patch ensures Tx flow affinity for forwarded frames,
but for termination traffic the initial flow affinity is determined
based on the skb hash, which is expected to hit only a few Tx queues
when there is a small number of flows.

Instead, use XPS (transmit packet steering) to set netdevice queue
affinity to the sending core.

Signed-off-by: Bogdan Purcareata <[email protected]>
Signed-off-by: Ioana Radulescu <[email protected]>
---
drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
index b63ae09..824c4ad 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
@@ -1620,9 +1620,10 @@ static struct dpaa2_eth_channel *get_affine_channel(struct dpaa2_eth_priv *priv,
static void set_fq_affinity(struct dpaa2_eth_priv *priv)
{
struct device *dev = priv->net_dev->dev.parent;
+ struct cpumask xps_mask;
struct dpaa2_eth_fq *fq;
int rx_cpu, txc_cpu;
- int i;
+ int i, err;

/* For each FQ, pick one channel/CPU to deliver frames to.
* This may well change at runtime, either through irqbalance or
@@ -1641,6 +1642,17 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv)
break;
case DPAA2_TX_CONF_FQ:
fq->target_cpu = txc_cpu;
+
+ /* Tell the stack to affine to txc_cpu the Tx queue
+ * associated with the confirmation one
+ */
+ cpumask_clear(&xps_mask);
+ cpumask_set_cpu(txc_cpu, &xps_mask);
+ err = netif_set_xps_queue(priv->net_dev, &xps_mask,
+ fq->flowid);
+ if (err)
+ dev_err(dev, "Error setting XPS queue\n");
+
txc_cpu = cpumask_next(txc_cpu, &priv->dpio_cpumask);
if (txc_cpu >= nr_cpu_ids)
txc_cpu = cpumask_first(&priv->dpio_cpumask);
--
2.7.4