This series of RFC patches for comments and additional proposals.
Virtio-net supports "hardware" RSS with toeplitz key.
Also, it allows receiving calculated hash in vheader
that may be used with RPS.
Added ethtools callbacks to manipulate RSS.
Technically hash calculation may be set only for
SRC+DST and SRC+DST+PORTSRC+PORTDST hashflows.
The completely disabling hash calculation for TCP or UDP
would disable hash calculation for IP.
RSS/RXHASH is disabled by default.
Changes since rfc:
* code refactored
* patches reformatted
* added feature validation
Andrew Melnychenko (4):
drivers/net/virtio_net: Fixed vheader to use v1.
drivers/net/virtio_net: Changed mergeable buffer length calculation.
drivers/net/virtio_net: Added basic RSS support.
drivers/net/virtio_net: Added RSS hash report control.
drivers/net/virtio_net.c | 405 +++++++++++++++++++++++++++++++++++++--
1 file changed, 390 insertions(+), 15 deletions(-)
--
2.33.1
The header v1 provides additional info about RSS.
Added changes to computing proper header length.
In the next patches, the header may contain RSS hash info
for the hash population.
Signed-off-by: Andrew Melnychenko <[email protected]>
---
drivers/net/virtio_net.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4ad25a8b0870..b72b21ac8ebd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -240,13 +240,13 @@ struct virtnet_info {
};
struct padded_vnet_hdr {
- struct virtio_net_hdr_mrg_rxbuf hdr;
+ struct virtio_net_hdr_v1_hash hdr;
/*
* hdr is in a separate sg buffer, and data sg buffer shares same page
* with this header sg. This padding makes next sg 16 byte aligned
* after the header.
*/
- char padding[4];
+ char padding[12];
};
static bool is_xdp_frame(void *ptr)
@@ -1636,7 +1636,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
struct virtnet_info *vi = sq->vq->vdev->priv;
int num_sg;
- unsigned hdr_len = vi->hdr_len;
+ unsigned int hdr_len = vi->hdr_len;
bool can_push;
pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
--
2.33.1
Now minimal virtual header length is may include the entire v1 header
if the hash report were populated.
Signed-off-by: Andrew Melnychenko <[email protected]>
---
drivers/net/virtio_net.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b72b21ac8ebd..abca2e93355d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -393,7 +393,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
hdr_p = p;
hdr_len = vi->hdr_len;
- if (vi->mergeable_rx_bufs)
+ if (vi->has_rss_hash_report)
+ hdr_padded_len = sizeof(struct virtio_net_hdr_v1_hash);
+ else if (vi->mergeable_rx_bufs)
hdr_padded_len = sizeof(*hdr);
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
@@ -1252,7 +1254,7 @@ static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
struct ewma_pkt_len *avg_pkt_len,
unsigned int room)
{
- const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ const size_t hdr_len = ((struct virtnet_info *)(rq->vq->vdev->priv))->hdr_len;
unsigned int len;
if (room)
@@ -2817,7 +2819,7 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
*/
static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
{
- const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ const unsigned int hdr_len = vi->hdr_len;
unsigned int rq_size = virtqueue_get_vring_size(vq);
unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
--
2.33.1
Added features for RSS and RSS hash report.
Added initialization, RXHASH feature and ethtool ops.
By default RSS/RXHASH is disabled.
Added ethtools ops to set key and indirection table.
Signed-off-by: Andrew Melnychenko <[email protected]>
---
drivers/net/virtio_net.c | 232 +++++++++++++++++++++++++++++++++++++--
1 file changed, 223 insertions(+), 9 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index abca2e93355d..cff7340f40bb 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -167,6 +167,28 @@ struct receive_queue {
struct xdp_rxq_info xdp_rxq;
};
+/* This structure can contain rss message with maximum settings for indirection table and keysize
+ * Note, that default structure that describes RSS configuration virtio_net_rss_config
+ * contains same info but can't handle table values.
+ * In any case, structure would be passed to virtio hw through sg_buf split by parts
+ * because table sizes may be differ according to the device configuration.
+ */
+#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
+#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128
+struct virtio_net_ctrl_rss {
+ struct {
+ __le32 hash_types;
+ __le16 indirection_table_mask;
+ __le16 unclassified_queue;
+ } __packed table_info;
+ u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
+ struct {
+ u16 max_tx_vq; /* queues */
+ u8 hash_key_length;
+ } __packed key_info;
+ u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
+};
+
/* Control VQ buffers: protected by the rtnl lock */
struct control_buf {
struct virtio_net_ctrl_hdr hdr;
@@ -176,6 +198,7 @@ struct control_buf {
u8 allmulti;
__virtio16 vid;
__virtio64 offloads;
+ struct virtio_net_ctrl_rss rss;
};
struct virtnet_info {
@@ -204,6 +227,12 @@ struct virtnet_info {
/* Host will merge rx buffers for big packets (shake it! shake it!) */
bool mergeable_rx_bufs;
+ /* Host supports rss and/or hash report */
+ bool has_rss;
+ bool has_rss_hash_report;
+ u8 rss_key_size;
+ u16 rss_indir_table_size;
+
/* Has control virtqueue */
bool has_cvq;
@@ -1119,6 +1148,8 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
struct net_device *dev = vi->dev;
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
+ struct virtio_net_hdr_v1_hash *hdr_hash;
+ enum pkt_hash_types rss_hash_type;
if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
@@ -1145,6 +1176,29 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
return;
hdr = skb_vnet_hdr(skb);
+ if (vi->has_rss_hash_report && (dev->features & NETIF_F_RXHASH)) {
+ hdr_hash = (struct virtio_net_hdr_v1_hash *)(hdr);
+
+ switch (hdr_hash->hash_report) {
+ case VIRTIO_NET_HASH_REPORT_TCPv4:
+ case VIRTIO_NET_HASH_REPORT_UDPv4:
+ case VIRTIO_NET_HASH_REPORT_TCPv6:
+ case VIRTIO_NET_HASH_REPORT_UDPv6:
+ case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
+ case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
+ rss_hash_type = PKT_HASH_TYPE_L4;
+ break;
+ case VIRTIO_NET_HASH_REPORT_IPv4:
+ case VIRTIO_NET_HASH_REPORT_IPv6:
+ case VIRTIO_NET_HASH_REPORT_IPv6_EX:
+ rss_hash_type = PKT_HASH_TYPE_L3;
+ break;
+ case VIRTIO_NET_HASH_REPORT_NONE:
+ default:
+ rss_hash_type = PKT_HASH_TYPE_NONE;
+ }
+ skb_set_hash(skb, hdr_hash->hash_value, rss_hash_type);
+ }
if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -2167,6 +2221,57 @@ static void virtnet_get_ringparam(struct net_device *dev,
ring->tx_pending = ring->tx_max_pending;
}
+static bool virtnet_commit_rss_command(struct virtnet_info *vi)
+{
+ struct net_device *dev = vi->dev;
+ struct scatterlist sgs[4];
+ unsigned int sg_buf_size;
+
+ /* prepare sgs */
+ sg_init_table(sgs, 4);
+
+ sg_buf_size = sizeof(vi->ctrl->rss.table_info);
+ sg_set_buf(&sgs[0], &vi->ctrl->rss.table_info, sg_buf_size);
+
+ sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size;
+ sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size);
+
+ sg_buf_size = sizeof(vi->ctrl->rss.key_info);
+ sg_set_buf(&sgs[2], &vi->ctrl->rss.key_info, sg_buf_size);
+
+ sg_buf_size = vi->rss_key_size;
+ sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size);
+
+ if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
+ vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
+ : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) {
+ dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
+ return false;
+ }
+ return true;
+}
+
+static void virtnet_init_default_rss(struct virtnet_info *vi)
+{
+ u32 indir_val = 0;
+ int i = 0;
+
+ vi->ctrl->rss.table_info.hash_types = vi->rss_hash_types_supported;
+ vi->rss_hash_types_saved = vi->rss_hash_types_supported;
+ vi->ctrl->rss.table_info.indirection_table_mask = vi->rss_indir_table_size - 1;
+ vi->ctrl->rss.table_info.unclassified_queue = 0;
+
+ for (; i < vi->rss_indir_table_size; ++i) {
+ indir_val = ethtool_rxfh_indir_default(i, vi->max_queue_pairs);
+ vi->ctrl->rss.indirection_table[i] = indir_val;
+ }
+
+ vi->ctrl->rss.key_info.max_tx_vq = vi->curr_queue_pairs;
+ vi->ctrl->rss.key_info.hash_key_length = vi->rss_key_size;
+
+ netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
+}
+
static void virtnet_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
@@ -2395,6 +2500,71 @@ static void virtnet_update_settings(struct virtnet_info *vi)
vi->duplex = duplex;
}
+static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
+{
+ return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
+}
+
+static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
+{
+ return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
+}
+
+static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i;
+
+ if (indir) {
+ for (i = 0; i < vi->rss_indir_table_size; ++i)
+ indir[i] = vi->ctrl->rss.indirection_table[i];
+ }
+
+ if (key)
+ memcpy(key, vi->ctrl->rss.key, vi->rss_key_size);
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_TOP;
+
+ return 0;
+}
+
+static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i;
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+ return -EOPNOTSUPP;
+
+ if (indir) {
+ for (i = 0; i < vi->rss_indir_table_size; ++i)
+ vi->ctrl->rss.indirection_table[i] = indir[i];
+ }
+ if (key)
+ memcpy(vi->ctrl->rss.key, key, vi->rss_key_size);
+
+ virtnet_commit_rss_command(vi);
+
+ return 0;
+}
+
+static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ int rc = 0;
+
+ switch (info->cmd) {
+ case ETHTOOL_GRXRINGS:
+ info->data = vi->curr_queue_pairs;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ }
+
+ return rc;
+}
+
static const struct ethtool_ops virtnet_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
.get_drvinfo = virtnet_get_drvinfo,
@@ -2410,6 +2580,11 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
.set_link_ksettings = virtnet_set_link_ksettings,
.set_coalesce = virtnet_set_coalesce,
.get_coalesce = virtnet_get_coalesce,
+ .get_rxfh_key_size = virtnet_get_rxfh_key_size,
+ .get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
+ .get_rxfh = virtnet_get_rxfh,
+ .set_rxfh = virtnet_set_rxfh,
+ .get_rxnfc = virtnet_get_rxnfc,
};
static void virtnet_freeze_down(struct virtio_device *vdev)
@@ -3040,7 +3215,10 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
- "VIRTIO_NET_F_CTRL_VQ"))) {
+ "VIRTIO_NET_F_CTRL_VQ") ||
+ VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_RSS") ||
+ VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
+ "VIRTIO_NET_F_HASH_REPORT"))) {
return false;
}
@@ -3080,13 +3258,14 @@ static int virtnet_probe(struct virtio_device *vdev)
u16 max_queue_pairs;
int mtu;
- /* Find if host supports multiqueue virtio_net device */
- err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
- struct virtio_net_config,
- max_virtqueue_pairs, &max_queue_pairs);
+ /* Find if host supports multiqueue/rss virtio_net device */
+ max_queue_pairs = 0;
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
+ max_queue_pairs =
+ virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
/* We need at least 2 queue's */
- if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+ if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
max_queue_pairs = 1;
@@ -3170,8 +3349,36 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vi->mergeable_rx_bufs = true;
- if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
- virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
+ vi->has_rss_hash_report = true;
+ vi->rss_indir_table_size = 1;
+ vi->rss_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
+ }
+
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
+ vi->has_rss = true;
+ vi->rss_indir_table_size =
+ virtio_cread16(vdev, offsetof(struct virtio_net_config,
+ rss_max_indirection_table_length));
+ vi->rss_key_size =
+ virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
+ }
+
+ if (vi->has_rss || vi->has_rss_hash_report) {
+ vi->rss_hash_types_supported =
+ virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
+ vi->rss_hash_types_supported &=
+ ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX |
+ VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
+ VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
+
+ dev->hw_features |= NETIF_F_RXHASH;
+ }
+
+ if (vi->has_rss_hash_report)
+ vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
+ else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
+ virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
else
vi->hdr_len = sizeof(struct virtio_net_hdr);
@@ -3238,6 +3445,12 @@ static int virtnet_probe(struct virtio_device *vdev)
}
}
+ if (vi->has_rss || vi->has_rss_hash_report) {
+ rtnl_lock();
+ virtnet_init_default_rss(vi);
+ rtnl_unlock();
+ }
+
err = register_netdev(dev);
if (err) {
pr_debug("virtio_net: registering device failed\n");
@@ -3369,7 +3582,8 @@ static struct virtio_device_id id_table[] = {
VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
VIRTIO_NET_F_CTRL_MAC_ADDR, \
VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
- VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY
+ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
+ VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
static unsigned int features[] = {
VIRTNET_FEATURES,
--
2.33.1
Added set_hash for skb.
Also added hashflow set/get callbacks.
Virtio RSS "IPv6 extensions" hashes disabled.
Also, disabling RXH_IP_SRC/DST for TCP would disable then for UDP.
TCP and UDP supports only:
ethtool -U eth0 rx-flow-hash tcp4 sd
RXH_IP_SRC + RXH_IP_DST
ethtool -U eth0 rx-flow-hash tcp4 sdfn
RXH_IP_SRC + RXH_IP_DST + RXH_L4_B_0_1 + RXH_L4_B_2_3
Signed-off-by: Andrew Melnychenko <[email protected]>
---
drivers/net/virtio_net.c | 159 +++++++++++++++++++++++++++++++++++++++
1 file changed, 159 insertions(+)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index cff7340f40bb..b1ed373d942b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -232,6 +232,8 @@ struct virtnet_info {
bool has_rss_hash_report;
u8 rss_key_size;
u16 rss_indir_table_size;
+ u32 rss_hash_types_supported;
+ u32 rss_hash_types_saved;
/* Has control virtqueue */
bool has_cvq;
@@ -2272,6 +2274,131 @@ static void virtnet_init_default_rss(struct virtnet_info *vi)
netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
}
+static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info)
+{
+ info->data = 0;
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
+ info->data = RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
+ info->data = RXH_IP_SRC | RXH_IP_DST;
+ }
+ break;
+ case TCP_V6_FLOW:
+ if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
+ info->data = RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
+ info->data = RXH_IP_SRC | RXH_IP_DST;
+ }
+ break;
+ case UDP_V4_FLOW:
+ if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
+ info->data = RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
+ info->data = RXH_IP_SRC | RXH_IP_DST;
+ }
+ break;
+ case UDP_V6_FLOW:
+ if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
+ info->data = RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
+ info->data = RXH_IP_SRC | RXH_IP_DST;
+ }
+ break;
+ case IPV4_FLOW:
+ if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
+ info->data = RXH_IP_SRC | RXH_IP_DST;
+
+ break;
+ case IPV6_FLOW:
+ if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
+ info->data = RXH_IP_SRC | RXH_IP_DST;
+
+ break;
+ default:
+ info->data = 0;
+ break;
+ }
+}
+
+static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info)
+{
+ u64 is_iphash = info->data & (RXH_IP_SRC | RXH_IP_DST);
+ u64 is_porthash = info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3);
+ u32 new_hashtypes = vi->rss_hash_types_saved;
+
+ if ((is_iphash && (is_iphash != (RXH_IP_SRC | RXH_IP_DST))) ||
+ (is_porthash && (is_porthash != (RXH_L4_B_0_1 | RXH_L4_B_2_3)))) {
+ return false;
+ }
+
+ if (!is_iphash && is_porthash)
+ return false;
+
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case IPV4_FLOW:
+ new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
+ if (is_iphash)
+ new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4;
+
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case IPV6_FLOW:
+ new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
+ if (is_iphash)
+ new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6;
+
+ break;
+ default:
+ break;
+ }
+
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_TCPv4;
+ if (is_porthash)
+ new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_TCPv4;
+
+ break;
+ case UDP_V4_FLOW:
+ new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_UDPv4;
+ if (is_porthash)
+ new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_UDPv4;
+
+ break;
+ case TCP_V6_FLOW:
+ new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
+ if (is_porthash)
+ new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
+
+ break;
+ case UDP_V6_FLOW:
+ new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
+ if (is_porthash)
+ new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
+
+ break;
+ default:
+ break;
+ }
+
+ if (new_hashtypes != vi->rss_hash_types_saved) {
+ vi->rss_hash_types_saved = new_hashtypes;
+ vi->ctrl->rss.table_info.hash_types = vi->rss_hash_types_saved;
+ if (vi->dev->features & NETIF_F_RXHASH)
+ return virtnet_commit_rss_command(vi);
+ }
+
+ return true;
+}
static void virtnet_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
@@ -2557,6 +2684,27 @@ static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
switch (info->cmd) {
case ETHTOOL_GRXRINGS:
info->data = vi->curr_queue_pairs;
+ break;
+ case ETHTOOL_GRXFH:
+ virtnet_get_hashflow(vi, info);
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ }
+
+ return rc;
+}
+
+static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ int rc = 0;
+
+ switch (info->cmd) {
+ case ETHTOOL_SRXFH:
+ if (!virtnet_set_hashflow(vi, info))
+ rc = -EINVAL;
+
break;
default:
rc = -EOPNOTSUPP;
@@ -2585,6 +2733,7 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
.get_rxfh = virtnet_get_rxfh,
.set_rxfh = virtnet_set_rxfh,
.get_rxnfc = virtnet_get_rxnfc,
+ .set_rxnfc = virtnet_set_rxnfc,
};
static void virtnet_freeze_down(struct virtio_device *vdev)
@@ -2837,6 +2986,16 @@ static int virtnet_set_features(struct net_device *dev,
vi->guest_offloads = offloads;
}
+ if ((dev->features ^ features) & NETIF_F_RXHASH) {
+ if (features & NETIF_F_RXHASH)
+ vi->ctrl->rss.table_info.hash_types = vi->rss_hash_types_saved;
+ else
+ vi->ctrl->rss.table_info.hash_types = 0;
+
+ if (!virtnet_commit_rss_command(vi))
+ return -EINVAL;
+ }
+
return 0;
}
--
2.33.1
On Sun, Oct 31, 2021 at 1:00 AM Andrew Melnychenko <[email protected]> wrote:
>
> Added features for RSS and RSS hash report.
> Added initialization, RXHASH feature and ethtool ops.
> By default RSS/RXHASH is disabled.
> Added ethtools ops to set key and indirection table.
>
> Signed-off-by: Andrew Melnychenko <[email protected]>
> ---
> drivers/net/virtio_net.c | 232 +++++++++++++++++++++++++++++++++++++--
> 1 file changed, 223 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index abca2e93355d..cff7340f40bb 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -167,6 +167,28 @@ struct receive_queue {
> struct xdp_rxq_info xdp_rxq;
> };
>
> +/* This structure can contain rss message with maximum settings for indirection table and keysize
> + * Note, that default structure that describes RSS configuration virtio_net_rss_config
> + * contains same info but can't handle table values.
> + * In any case, structure would be passed to virtio hw through sg_buf split by parts
> + * because table sizes may be differ according to the device configuration.
> + */
> +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
Unless there is a technical reason, this probably should be no shorter
than TOEPLITZ_KEY_LEN
> +#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128
> +struct virtio_net_ctrl_rss {
> + struct {
> + __le32 hash_types;
> + __le16 indirection_table_mask;
> + __le16 unclassified_queue;
Is this explicit variable needed?
> + } __packed table_info;
> + u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
> + struct {
> + u16 max_tx_vq; /* queues */
> + u8 hash_key_length;
> + } __packed key_info;
> + u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
> +};
> +
> /* Control VQ buffers: protected by the rtnl lock */
> struct control_buf {
> struct virtio_net_ctrl_hdr hdr;
> @@ -176,6 +198,7 @@ struct control_buf {
> u8 allmulti;
> __virtio16 vid;
> __virtio64 offloads;
> + struct virtio_net_ctrl_rss rss;
> };
>
> struct virtnet_info {
> @@ -204,6 +227,12 @@ struct virtnet_info {
> /* Host will merge rx buffers for big packets (shake it! shake it!) */
> bool mergeable_rx_bufs;
>
> + /* Host supports rss and/or hash report */
> + bool has_rss;
Superfluous, can be derived form non-zero rss_key_size?
> + bool has_rss_hash_report;
> + u8 rss_key_size;
> + u16 rss_indir_table_size;
> +
> /* Has control virtqueue */
> bool has_cvq;
>
> @@ -1119,6 +1148,8 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
> struct net_device *dev = vi->dev;
> struct sk_buff *skb;
> struct virtio_net_hdr_mrg_rxbuf *hdr;
> + struct virtio_net_hdr_v1_hash *hdr_hash;
> + enum pkt_hash_types rss_hash_type;
>
> if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
> pr_debug("%s: short packet %i\n", dev->name, len);
> @@ -1145,6 +1176,29 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
> return;
>
> hdr = skb_vnet_hdr(skb);
> + if (vi->has_rss_hash_report && (dev->features & NETIF_F_RXHASH)) {
Only the second test is needed? It should be impossible to configure
the feature unless the device advertises has_rss_hash_report
> + hdr_hash = (struct virtio_net_hdr_v1_hash *)(hdr);
> +
> + switch (hdr_hash->hash_report) {
> + case VIRTIO_NET_HASH_REPORT_TCPv4:
> + case VIRTIO_NET_HASH_REPORT_UDPv4:
> + case VIRTIO_NET_HASH_REPORT_TCPv6:
> + case VIRTIO_NET_HASH_REPORT_UDPv6:
> + case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> + case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> + rss_hash_type = PKT_HASH_TYPE_L4;
> + break;
> + case VIRTIO_NET_HASH_REPORT_IPv4:
> + case VIRTIO_NET_HASH_REPORT_IPv6:
> + case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> + rss_hash_type = PKT_HASH_TYPE_L3;
> + break;
> + case VIRTIO_NET_HASH_REPORT_NONE:
> + default:
> + rss_hash_type = PKT_HASH_TYPE_NONE;
> + }
Is this detailed protocol typing necessary? Most devices only pass a bit is_l4.
> + skb_set_hash(skb, hdr_hash->hash_value, rss_hash_type);
> + }
>
> if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
> skb->ip_summed = CHECKSUM_UNNECESSARY;
> @@ -2167,6 +2221,57 @@ static void virtnet_get_ringparam(struct net_device *dev,
> ring->tx_pending = ring->tx_max_pending;
> }
>
> +static bool virtnet_commit_rss_command(struct virtnet_info *vi)
> +{
> + struct net_device *dev = vi->dev;
> + struct scatterlist sgs[4];
> + unsigned int sg_buf_size;
> +
> + /* prepare sgs */
> + sg_init_table(sgs, 4);
> +
> + sg_buf_size = sizeof(vi->ctrl->rss.table_info);
> + sg_set_buf(&sgs[0], &vi->ctrl->rss.table_info, sg_buf_size);
> +
> + sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size;
> + sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size);
> +
> + sg_buf_size = sizeof(vi->ctrl->rss.key_info);
> + sg_set_buf(&sgs[2], &vi->ctrl->rss.key_info, sg_buf_size);
> +
> + sg_buf_size = vi->rss_key_size;
> + sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size);
> +
> + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
> + vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
> + : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) {
> + dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
> + return false;
> + }
> + return true;
> +}
> +
> +static void virtnet_init_default_rss(struct virtnet_info *vi)
> +{
> + u32 indir_val = 0;
> + int i = 0;
> +
> + vi->ctrl->rss.table_info.hash_types = vi->rss_hash_types_supported;
Similar to above, and related to the next patch: is this very detailed
specification of supported hash types needed? When is this useful? It
is not customary to specify RSS to that degree.
> + vi->rss_hash_types_saved = vi->rss_hash_types_supported;
> + vi->ctrl->rss.table_info.indirection_table_mask = vi->rss_indir_table_size - 1;
> + vi->ctrl->rss.table_info.unclassified_queue = 0;
> +
> + for (; i < vi->rss_indir_table_size; ++i) {
> + indir_val = ethtool_rxfh_indir_default(i, vi->max_queue_pairs);
> + vi->ctrl->rss.indirection_table[i] = indir_val;
> + }
> +
> + vi->ctrl->rss.key_info.max_tx_vq = vi->curr_queue_pairs;
> + vi->ctrl->rss.key_info.hash_key_length = vi->rss_key_size;
> +
> + netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
> +}
> +
>
> static void virtnet_get_drvinfo(struct net_device *dev,
> struct ethtool_drvinfo *info)
> @@ -2395,6 +2500,71 @@ static void virtnet_update_settings(struct virtnet_info *vi)
> vi->duplex = duplex;
> }
>
> +static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
> +{
> + return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
> +}
> +
> +static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
> +{
> + return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
> +}
> +
> +static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + int i;
> +
> + if (indir) {
> + for (i = 0; i < vi->rss_indir_table_size; ++i)
> + indir[i] = vi->ctrl->rss.indirection_table[i];
> + }
> +
> + if (key)
> + memcpy(key, vi->ctrl->rss.key, vi->rss_key_size);
> +
> + if (hfunc)
> + *hfunc = ETH_RSS_HASH_TOP;
> +
> + return 0;
> +}
> +
> +static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + int i;
> +
> + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
> + return -EOPNOTSUPP;
> +
> + if (indir) {
> + for (i = 0; i < vi->rss_indir_table_size; ++i)
> + vi->ctrl->rss.indirection_table[i] = indir[i];
> + }
> + if (key)
> + memcpy(vi->ctrl->rss.key, key, vi->rss_key_size);
> +
> + virtnet_commit_rss_command(vi);
> +
> + return 0;
> +}
> +
> +static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + int rc = 0;
> +
> + switch (info->cmd) {
> + case ETHTOOL_GRXRINGS:
> + info->data = vi->curr_queue_pairs;
> + break;
> + default:
> + rc = -EOPNOTSUPP;
> + }
> +
> + return rc;
> +}
> +
> static const struct ethtool_ops virtnet_ethtool_ops = {
> .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
> .get_drvinfo = virtnet_get_drvinfo,
> @@ -2410,6 +2580,11 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
> .set_link_ksettings = virtnet_set_link_ksettings,
> .set_coalesce = virtnet_set_coalesce,
> .get_coalesce = virtnet_get_coalesce,
> + .get_rxfh_key_size = virtnet_get_rxfh_key_size,
> + .get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
> + .get_rxfh = virtnet_get_rxfh,
> + .set_rxfh = virtnet_set_rxfh,
> + .get_rxnfc = virtnet_get_rxnfc,
> };
>
> static void virtnet_freeze_down(struct virtio_device *vdev)
> @@ -3040,7 +3215,10 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
> "VIRTIO_NET_F_CTRL_VQ") ||
> VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
> VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
> - "VIRTIO_NET_F_CTRL_VQ"))) {
> + "VIRTIO_NET_F_CTRL_VQ") ||
> + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_RSS") ||
> + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> + "VIRTIO_NET_F_HASH_REPORT"))) {
> return false;
> }
>
> @@ -3080,13 +3258,14 @@ static int virtnet_probe(struct virtio_device *vdev)
> u16 max_queue_pairs;
> int mtu;
>
> - /* Find if host supports multiqueue virtio_net device */
> - err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
> - struct virtio_net_config,
> - max_virtqueue_pairs, &max_queue_pairs);
> + /* Find if host supports multiqueue/rss virtio_net device */
> + max_queue_pairs = 0;
> + if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
Is VIRTIO_NET_F_RSS implied by VIRTIO_NET_F_MQ?
> + max_queue_pairs =
> + virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
>
> /* We need at least 2 queue's */
> - if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
> + if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
> max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
> !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
> max_queue_pairs = 1;
> @@ -3170,8 +3349,36 @@ static int virtnet_probe(struct virtio_device *vdev)
> if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> vi->mergeable_rx_bufs = true;
>
> - if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
> - virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
> + if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
> + vi->has_rss_hash_report = true;
> + vi->rss_indir_table_size = 1;
> + vi->rss_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
> + }
> +
> + if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
> + vi->has_rss = true;
> + vi->rss_indir_table_size =
> + virtio_cread16(vdev, offsetof(struct virtio_net_config,
> + rss_max_indirection_table_length));
> + vi->rss_key_size =
> + virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
> + }
Please split adding the two features, hash report and rss, into two
separate patches.
> > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(hdr);
> > +
> > + switch (hdr_hash->hash_report) {
> > + case VIRTIO_NET_HASH_REPORT_TCPv4:
> > + case VIRTIO_NET_HASH_REPORT_UDPv4:
> > + case VIRTIO_NET_HASH_REPORT_TCPv6:
> > + case VIRTIO_NET_HASH_REPORT_UDPv6:
> > + case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> > + case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> > + rss_hash_type = PKT_HASH_TYPE_L4;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_IPv4:
> > + case VIRTIO_NET_HASH_REPORT_IPv6:
> > + case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> > + rss_hash_type = PKT_HASH_TYPE_L3;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_NONE:
> > + default:
> > + rss_hash_type = PKT_HASH_TYPE_NONE;
> > + }
>
> Is this detailed protocol typing necessary? Most devices only pass a bit is_l4.
> > +static void virtnet_init_default_rss(struct virtnet_info *vi)
> > +{
> > + u32 indir_val = 0;
> > + int i = 0;
> > +
> > + vi->ctrl->rss.table_info.hash_types = vi->rss_hash_types_supported;
>
> Similar to above, and related to the next patch: is this very detailed
> specification of supported hash types needed? When is this useful? It
> is not customary to specify RSS to that degree.
My bad. This is also implemented by bnxt, for one. I was unaware of
this feature.
On Sun, Oct 31, 2021 at 06:59:56AM +0200, Andrew Melnychenko wrote:
> The header v1 provides additional info about RSS.
> Added changes to computing proper header length.
> In the next patches, the header may contain RSS hash info
> for the hash population.
>
> Signed-off-by: Andrew Melnychenko <[email protected]>
> ---
> drivers/net/virtio_net.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 4ad25a8b0870..b72b21ac8ebd 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -240,13 +240,13 @@ struct virtnet_info {
> };
>
> struct padded_vnet_hdr {
> - struct virtio_net_hdr_mrg_rxbuf hdr;
> + struct virtio_net_hdr_v1_hash hdr;
> /*
> * hdr is in a separate sg buffer, and data sg buffer shares same page
> * with this header sg. This padding makes next sg 16 byte aligned
> * after the header.
> */
> - char padding[4];
> + char padding[12];
> };
>
> static bool is_xdp_frame(void *ptr)
This is not helpful as a separate patch, just reserving extra space.
better squash with the patches making use of the change.
> @@ -1636,7 +1636,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
> const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
> struct virtnet_info *vi = sq->vq->vdev->priv;
> int num_sg;
> - unsigned hdr_len = vi->hdr_len;
> + unsigned int hdr_len = vi->hdr_len;
> bool can_push;
if we want this, pls make it a separate patch.
>
> pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
> --
> 2.33.1
On Sun, Oct 31, 2021 at 06:59:57AM +0200, Andrew Melnychenko wrote:
> Now minimal virtual header length is may include the entire v1 header
> if the hash report were populated.
>
> Signed-off-by: Andrew Melnychenko <[email protected]>
subject isn't really descriptive. changed it how?
And I couldn't really decypher what this log entry means either.
> ---
> drivers/net/virtio_net.c | 8 +++++---
> 1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index b72b21ac8ebd..abca2e93355d 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -393,7 +393,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> hdr_p = p;
>
> hdr_len = vi->hdr_len;
> - if (vi->mergeable_rx_bufs)
> + if (vi->has_rss_hash_report)
> + hdr_padded_len = sizeof(struct virtio_net_hdr_v1_hash);
> + else if (vi->mergeable_rx_bufs)
> hdr_padded_len = sizeof(*hdr);
> else
> hdr_padded_len = sizeof(struct padded_vnet_hdr);
> @@ -1252,7 +1254,7 @@ static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
> struct ewma_pkt_len *avg_pkt_len,
> unsigned int room)
> {
> - const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> + const size_t hdr_len = ((struct virtnet_info *)(rq->vq->vdev->priv))->hdr_len;
> unsigned int len;
>
> if (room)
Is this pointer chasing the best we can do?
> @@ -2817,7 +2819,7 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
> */
> static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
> {
> - const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> + const unsigned int hdr_len = vi->hdr_len;
> unsigned int rq_size = virtqueue_get_vring_size(vq);
> unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
> unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
> --
> 2.33.1
On Mon, Nov 1, 2021 at 10:44 AM Michael S. Tsirkin <[email protected]> wrote:
>
> On Sun, Oct 31, 2021 at 06:59:57AM +0200, Andrew Melnychenko wrote:
> > Now minimal virtual header length is may include the entire v1 header
> > if the hash report were populated.
> >
> > Signed-off-by: Andrew Melnychenko <[email protected]>
>
> subject isn't really descriptive. changed it how?
>
> And I couldn't really decypher what this log entry means either.
>
I'll change it in the next patch.
So, I've tried to ensure that the v1 header with the hash report will
be available if required in new patches.
> > ---
> > drivers/net/virtio_net.c | 8 +++++---
> > 1 file changed, 5 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index b72b21ac8ebd..abca2e93355d 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -393,7 +393,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > hdr_p = p;
> >
> > hdr_len = vi->hdr_len;
> > - if (vi->mergeable_rx_bufs)
> > + if (vi->has_rss_hash_report)
> > + hdr_padded_len = sizeof(struct virtio_net_hdr_v1_hash);
> > + else if (vi->mergeable_rx_bufs)
> > hdr_padded_len = sizeof(*hdr);
> > else
> > hdr_padded_len = sizeof(struct padded_vnet_hdr);
> > @@ -1252,7 +1254,7 @@ static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
> > struct ewma_pkt_len *avg_pkt_len,
> > unsigned int room)
> > {
> > - const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > + const size_t hdr_len = ((struct virtnet_info *)(rq->vq->vdev->priv))->hdr_len;
> > unsigned int len;
> >
> > if (room)
>
> Is this pointer chasing the best we can do?
I'll change that.
>
> > @@ -2817,7 +2819,7 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
> > */
> > static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
> > {
> > - const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > + const unsigned int hdr_len = vi->hdr_len;
> > unsigned int rq_size = virtqueue_get_vring_size(vq);
> > unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
> > unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
> > --
> > 2.33.1
>
On Mon, Nov 1, 2021 at 10:40 AM Michael S. Tsirkin <[email protected]> wrote:
>
> On Sun, Oct 31, 2021 at 06:59:56AM +0200, Andrew Melnychenko wrote:
> > The header v1 provides additional info about RSS.
> > Added changes to computing proper header length.
> > In the next patches, the header may contain RSS hash info
> > for the hash population.
> >
> > Signed-off-by: Andrew Melnychenko <[email protected]>
> > ---
> > drivers/net/virtio_net.c | 6 +++---
> > 1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 4ad25a8b0870..b72b21ac8ebd 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -240,13 +240,13 @@ struct virtnet_info {
> > };
> >
> > struct padded_vnet_hdr {
> > - struct virtio_net_hdr_mrg_rxbuf hdr;
> > + struct virtio_net_hdr_v1_hash hdr;
> > /*
> > * hdr is in a separate sg buffer, and data sg buffer shares same page
> > * with this header sg. This padding makes next sg 16 byte aligned
> > * after the header.
> > */
> > - char padding[4];
> > + char padding[12];
> > };
> >
> > static bool is_xdp_frame(void *ptr)
>
>
> This is not helpful as a separate patch, just reserving extra space.
> better squash with the patches making use of the change.
Ok.
>
> > @@ -1636,7 +1636,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
> > const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
> > struct virtnet_info *vi = sq->vq->vdev->priv;
> > int num_sg;
> > - unsigned hdr_len = vi->hdr_len;
> > + unsigned int hdr_len = vi->hdr_len;
> > bool can_push;
>
>
> if we want this, pls make it a separate patch.
Ok. I've added that change after checkpatch warnings. Technically,
checkpatch should not fail on the patch without that line.
>
>
> >
> > pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
> > --
> > 2.33.1
>
On Sun, Oct 31, 2021 at 5:33 PM Willem de Bruijn
<[email protected]> wrote:
>
> On Sun, Oct 31, 2021 at 1:00 AM Andrew Melnychenko <[email protected]> wrote:
> >
> > Added features for RSS and RSS hash report.
> > Added initialization, RXHASH feature and ethtool ops.
> > By default RSS/RXHASH is disabled.
> > Added ethtools ops to set key and indirection table.
> >
> > Signed-off-by: Andrew Melnychenko <[email protected]>
> > ---
> > drivers/net/virtio_net.c | 232 +++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 223 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index abca2e93355d..cff7340f40bb 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -167,6 +167,28 @@ struct receive_queue {
> > struct xdp_rxq_info xdp_rxq;
> > };
> >
> > +/* This structure can contain rss message with maximum settings for indirection table and keysize
> > + * Note, that default structure that describes RSS configuration virtio_net_rss_config
> > + * contains same info but can't handle table values.
> > + * In any case, structure would be passed to virtio hw through sg_buf split by parts
> > + * because table sizes may be differ according to the device configuration.
> > + */
> > +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
>
> Unless there is a technical reason, this probably should be no shorter
> than TOEPLITZ_KEY_LEN
Well yeah, technically if the device requests for shorter key, we
still may provide it.
I think we may check and 'disable' RSS if some configurations are inadequate.
>
> > +#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128
> > +struct virtio_net_ctrl_rss {
> > + struct {
> > + __le32 hash_types;
> > + __le16 indirection_table_mask;
> > + __le16 unclassified_queue;
>
> Is this explicit variable needed?
Yes, it's a part of the message to be sent to the device.
>
> > + } __packed table_info;
> > + u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
> > + struct {
> > + u16 max_tx_vq; /* queues */
> > + u8 hash_key_length;
> > + } __packed key_info;
> > + u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
> > +};
> > +
> > /* Control VQ buffers: protected by the rtnl lock */
> > struct control_buf {
> > struct virtio_net_ctrl_hdr hdr;
> > @@ -176,6 +198,7 @@ struct control_buf {
> > u8 allmulti;
> > __virtio16 vid;
> > __virtio64 offloads;
> > + struct virtio_net_ctrl_rss rss;
> > };
> >
> > struct virtnet_info {
> > @@ -204,6 +227,12 @@ struct virtnet_info {
> > /* Host will merge rx buffers for big packets (shake it! shake it!) */
> > bool mergeable_rx_bufs;
> >
> > + /* Host supports rss and/or hash report */
> > + bool has_rss;
>
> Superfluous, can be derived form non-zero rss_key_size?
I think that the explicit 'has_rss' field is better. "non-zero
rss_key_size" should work, I'll change RSS derivation in the next
patches.
>
> > + bool has_rss_hash_report;
> > + u8 rss_key_size;
> > + u16 rss_indir_table_size;
> > +
> > /* Has control virtqueue */
> > bool has_cvq;
> >
> > @@ -1119,6 +1148,8 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
> > struct net_device *dev = vi->dev;
> > struct sk_buff *skb;
> > struct virtio_net_hdr_mrg_rxbuf *hdr;
> > + struct virtio_net_hdr_v1_hash *hdr_hash;
> > + enum pkt_hash_types rss_hash_type;
> >
> > if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
> > pr_debug("%s: short packet %i\n", dev->name, len);
> > @@ -1145,6 +1176,29 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
> > return;
> >
> > hdr = skb_vnet_hdr(skb);
> > + if (vi->has_rss_hash_report && (dev->features & NETIF_F_RXHASH)) {
>
> Only the second test is needed? It should be impossible to configure
> the feature unless the device advertises has_rss_hash_report
Well, you can have RSS without a hash population. So, need to check,
is the device supports hash population and rxhash is enabled(g.e.
through ethtool).
>
> > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(hdr);
> > +
> > + switch (hdr_hash->hash_report) {
> > + case VIRTIO_NET_HASH_REPORT_TCPv4:
> > + case VIRTIO_NET_HASH_REPORT_UDPv4:
> > + case VIRTIO_NET_HASH_REPORT_TCPv6:
> > + case VIRTIO_NET_HASH_REPORT_UDPv6:
> > + case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> > + case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> > + rss_hash_type = PKT_HASH_TYPE_L4;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_IPv4:
> > + case VIRTIO_NET_HASH_REPORT_IPv6:
> > + case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> > + rss_hash_type = PKT_HASH_TYPE_L3;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_NONE:
> > + default:
> > + rss_hash_type = PKT_HASH_TYPE_NONE;
> > + }
>
> Is this detailed protocol typing necessary? Most devices only pass a bit is_l4.
Yes, in theory, there is real hardware that may support only L3 hash
calculations.
>
> > + skb_set_hash(skb, hdr_hash->hash_value, rss_hash_type);
> > + }
> >
> > if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
> > skb->ip_summed = CHECKSUM_UNNECESSARY;
> > @@ -2167,6 +2221,57 @@ static void virtnet_get_ringparam(struct net_device *dev,
> > ring->tx_pending = ring->tx_max_pending;
> > }
> >
> > +static bool virtnet_commit_rss_command(struct virtnet_info *vi)
> > +{
> > + struct net_device *dev = vi->dev;
> > + struct scatterlist sgs[4];
> > + unsigned int sg_buf_size;
> > +
> > + /* prepare sgs */
> > + sg_init_table(sgs, 4);
> > +
> > + sg_buf_size = sizeof(vi->ctrl->rss.table_info);
> > + sg_set_buf(&sgs[0], &vi->ctrl->rss.table_info, sg_buf_size);
> > +
> > + sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size;
> > + sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size);
> > +
> > + sg_buf_size = sizeof(vi->ctrl->rss.key_info);
> > + sg_set_buf(&sgs[2], &vi->ctrl->rss.key_info, sg_buf_size);
> > +
> > + sg_buf_size = vi->rss_key_size;
> > + sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size);
> > +
> > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
> > + vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
> > + : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) {
> > + dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
> > + return false;
> > + }
> > + return true;
> > +}
> > +
> > +static void virtnet_init_default_rss(struct virtnet_info *vi)
> > +{
> > + u32 indir_val = 0;
> > + int i = 0;
> > +
> > + vi->ctrl->rss.table_info.hash_types = vi->rss_hash_types_supported;
>
> Similar to above, and related to the next patch: is this very detailed
> specification of supported hash types needed? When is this useful? It
> is not customary to specify RSS to that degree.
In theory, there are real devices that implement virtio_net with(or
without) some supported hashes.
>
> > + vi->rss_hash_types_saved = vi->rss_hash_types_supported;
> > + vi->ctrl->rss.table_info.indirection_table_mask = vi->rss_indir_table_size - 1;
> > + vi->ctrl->rss.table_info.unclassified_queue = 0;
> > +
> > + for (; i < vi->rss_indir_table_size; ++i) {
> > + indir_val = ethtool_rxfh_indir_default(i, vi->max_queue_pairs);
> > + vi->ctrl->rss.indirection_table[i] = indir_val;
> > + }
> > +
> > + vi->ctrl->rss.key_info.max_tx_vq = vi->curr_queue_pairs;
> > + vi->ctrl->rss.key_info.hash_key_length = vi->rss_key_size;
> > +
> > + netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
> > +}
> > +
> >
> > static void virtnet_get_drvinfo(struct net_device *dev,
> > struct ethtool_drvinfo *info)
> > @@ -2395,6 +2500,71 @@ static void virtnet_update_settings(struct virtnet_info *vi)
> > vi->duplex = duplex;
> > }
> >
> > +static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
> > +{
> > + return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
> > +}
> > +
> > +static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
> > +{
> > + return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
> > +}
> > +
> > +static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + int i;
> > +
> > + if (indir) {
> > + for (i = 0; i < vi->rss_indir_table_size; ++i)
> > + indir[i] = vi->ctrl->rss.indirection_table[i];
> > + }
> > +
> > + if (key)
> > + memcpy(key, vi->ctrl->rss.key, vi->rss_key_size);
> > +
> > + if (hfunc)
> > + *hfunc = ETH_RSS_HASH_TOP;
> > +
> > + return 0;
> > +}
> > +
> > +static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + int i;
> > +
> > + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
> > + return -EOPNOTSUPP;
> > +
> > + if (indir) {
> > + for (i = 0; i < vi->rss_indir_table_size; ++i)
> > + vi->ctrl->rss.indirection_table[i] = indir[i];
> > + }
> > + if (key)
> > + memcpy(vi->ctrl->rss.key, key, vi->rss_key_size);
> > +
> > + virtnet_commit_rss_command(vi);
> > +
> > + return 0;
> > +}
> > +
> > +static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + int rc = 0;
> > +
> > + switch (info->cmd) {
> > + case ETHTOOL_GRXRINGS:
> > + info->data = vi->curr_queue_pairs;
> > + break;
> > + default:
> > + rc = -EOPNOTSUPP;
> > + }
> > +
> > + return rc;
> > +}
> > +
> > static const struct ethtool_ops virtnet_ethtool_ops = {
> > .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
> > .get_drvinfo = virtnet_get_drvinfo,
> > @@ -2410,6 +2580,11 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
> > .set_link_ksettings = virtnet_set_link_ksettings,
> > .set_coalesce = virtnet_set_coalesce,
> > .get_coalesce = virtnet_get_coalesce,
> > + .get_rxfh_key_size = virtnet_get_rxfh_key_size,
> > + .get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
> > + .get_rxfh = virtnet_get_rxfh,
> > + .set_rxfh = virtnet_set_rxfh,
> > + .get_rxnfc = virtnet_get_rxnfc,
> > };
> >
> > static void virtnet_freeze_down(struct virtio_device *vdev)
> > @@ -3040,7 +3215,10 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
> > "VIRTIO_NET_F_CTRL_VQ") ||
> > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
> > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
> > - "VIRTIO_NET_F_CTRL_VQ"))) {
> > + "VIRTIO_NET_F_CTRL_VQ") ||
> > + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_RSS") ||
> > + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> > + "VIRTIO_NET_F_HASH_REPORT"))) {
> > return false;
> > }
> >
> > @@ -3080,13 +3258,14 @@ static int virtnet_probe(struct virtio_device *vdev)
> > u16 max_queue_pairs;
> > int mtu;
> >
> > - /* Find if host supports multiqueue virtio_net device */
> > - err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
> > - struct virtio_net_config,
> > - max_virtqueue_pairs, &max_queue_pairs);
> > + /* Find if host supports multiqueue/rss virtio_net device */
> > + max_queue_pairs = 0;
> > + if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
>
> Is VIRTIO_NET_F_RSS implied by VIRTIO_NET_F_MQ?
MQ and/or RSS sets multiqueue, like so:
> virtio_net_set_multiqueue(n,
> virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
> virtio_has_feature(features, VIRTIO_NET_F_MQ));
So, technically it's possible to create a virtual net only with RSS without mq.
>
> > + max_queue_pairs =
> > + virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
> >
> > /* We need at least 2 queue's */
> > - if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
> > + if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
> > max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
> > !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
> > max_queue_pairs = 1;
> > @@ -3170,8 +3349,36 @@ static int virtnet_probe(struct virtio_device *vdev)
> > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> > vi->mergeable_rx_bufs = true;
> >
> > - if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
> > - virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
> > + if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
> > + vi->has_rss_hash_report = true;
> > + vi->rss_indir_table_size = 1;
> > + vi->rss_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
> > + }
> > +
> > + if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
> > + vi->has_rss = true;
> > + vi->rss_indir_table_size =
> > + virtio_cread16(vdev, offsetof(struct virtio_net_config,
> > + rss_max_indirection_table_length));
> > + vi->rss_key_size =
> > + virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
> > + }
>
> Please split adding the two features, hash report and rss, into two
> separate patches.
It may provide dead code that will be replaced by code in 'hash
report' report patch.