2019-08-14 19:12:26

by Haiyang Zhang

[permalink] [raw]
Subject: [PATCH net-next, 0/6] Add software backchannel and mlx5e HV VHCA stats

This patch set adds paravirtual backchannel in software in pci_hyperv,
which is required by the mlx5e driver HV VHCA stats agent.

The stats agent is responsible on running a periodic rx/tx packets/bytes
stats update.

Dexuan Cui (1):
PCI: hv: Add a paravirtual backchannel in software

Eran Ben Elisha (4):
net/mlx5: Add wrappers for HyperV PCIe operations
net/mlx5: Add HV VHCA infrastructure
net/mlx5: Add HV VHCA control agent
net/mlx5e: Add mlx5e HV VHCA stats agent

Haiyang Zhang (1):
PCI: hv: Add a Hyper-V PCI mini driver for software backchannel
interface

MAINTAINERS | 1 +
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +
drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 +
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 +++++++++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h | 25 ++
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 365 +++++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 104 ++++++
drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +
drivers/pci/Kconfig | 1 +
drivers/pci/controller/Kconfig | 7 +
drivers/pci/controller/Makefile | 1 +
drivers/pci/controller/pci-hyperv-mini.c | 70 ++++
drivers/pci/controller/pci-hyperv.c | 308 +++++++++++++++++
include/linux/hyperv.h | 29 ++
include/linux/mlx5/driver.h | 2 +
18 files changed, 1186 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
create mode 100644 drivers/pci/controller/pci-hyperv-mini.c

--
1.8.3.1


2019-08-14 19:12:30

by Haiyang Zhang

[permalink] [raw]
Subject: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations

From: Eran Ben Elisha <[email protected]>

Add wrapper functions for HyperV PCIe read / write /
block_invalidate_register operations. This will be used as an
infrastructure in the downstream patch for software communication.

This will be enabled by default if CONFIG_PCI_HYPERV_MINI is set.

Signed-off-by: Eran Ben Elisha <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
3 files changed, 87 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8b7edaa..a8950b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+mlx5_core-$(CONFIG_PCI_HYPERV_MINI) += lib/hv.o

#
# Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 0000000..cf08d02
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset, bool read)
+{
+ int rc = -EOPNOTSUPP;
+ int bytes_returned;
+ int block_id;
+
+ if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+ rc = read ?
+ hyperv_read_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+ &bytes_returned) :
+ hyperv_write_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+ /* Make sure len bytes were read successfully */
+ if (read)
+ rc |= !(len == bytes_returned);
+
+ if (rc) {
+ mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+ read ? "read" : "write", rc, len,
+ offset);
+ return rc;
+ }
+
+ return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ return hyperv_reg_block_invalidate(dev->pdev, context,
+ block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+ hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 0000000..7f69771
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */
--
1.8.3.1

2019-08-14 19:12:35

by Haiyang Zhang

[permalink] [raw]
Subject: [PATCH net-next, 6/6] net/mlx5e: Add mlx5e HV VHCA stats agent

From: Eran Ben Elisha <[email protected]>

HV VHCA stats agent is responsible on running a preiodic rx/tx
packets/bytes stats update. Currently the supported format is version
MLX5_HV_VHCA_STATS_VERSION. Block ID 1 is dedicated for statistics data
transfer from the VF to the PF.

The reporter fetch the statistics data from all opened channels, fill it
in a buffer and send it to mlx5_hv_vhca_write_agent.

As the stats layer should include some metadata per block (sequence and
offset), the HV VHCA layer shall modify the buffer before actually send it
over block 1.

Signed-off-by: Eran Ben Elisha <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 ++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 +++++++++++++++++++++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h | 25 ++++
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 1 +
6 files changed, 205 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index e0a1056..1e8ade9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -36,6 +36,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
en/tc_tun_geneve.o
+mlx5_core-$(CONFIG_PCI_HYPERV_MINI) += en/hv_vhca_stats.o

#
# Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8fc5107..fc41653 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -54,6 +54,7 @@
#include "mlx5_core.h"
#include "en_stats.h"
#include "en/fs.h"
+#include "lib/hv_vhca.h"

extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
@@ -777,6 +778,15 @@ struct mlx5e_modify_sq_param {
int rl_index;
};

+#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
+struct mlx5e_hv_vhca_stats_agent {
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work work;
+ u16 delay;
+ void *buf;
+};
+#endif
+
struct mlx5e_xsk {
/* UMEMs are stored separately from channels, because we don't want to
* lose them when channels are recreated. The kernel also stores UMEMs,
@@ -848,6 +858,9 @@ struct mlx5e_priv {
struct devlink_health_reporter *tx_reporter;
struct devlink_health_reporter *rx_reporter;
struct mlx5e_xsk xsk;
+#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
+ struct mlx5e_hv_vhca_stats_agent stats_agent;
+#endif
};

struct mlx5e_profile {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
new file mode 100644
index 0000000..c37b4ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include "en.h"
+#include "en/hv_vhca_stats.h"
+#include "lib/hv_vhca.h"
+#include "lib/hv.h"
+
+struct mlx5e_hv_vhca_per_ring_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+};
+
+static void
+mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
+ struct mlx5e_hv_vhca_per_ring_stats *data)
+{
+ struct mlx5e_channel_stats *stats;
+ int tc;
+
+ stats = &priv->channel_stats[ch];
+ data->rx_packets = stats->rq.packets;
+ data->rx_bytes = stats->rq.bytes;
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++) {
+ data->tx_packets += stats->sq[tc].packets;
+ data->tx_bytes += stats->sq[tc].bytes;
+ }
+}
+
+static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int buf_len)
+{
+ int ch, i = 0;
+
+ for (ch = 0; ch < priv->max_nch; ch++) {
+ u64 *buf = data + i;
+
+ if (WARN_ON_ONCE(buf +
+ sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
+ data + buf_len))
+ return;
+
+ mlx5e_hv_vhca_fill_ring_stats(priv, ch,
+ (struct mlx5e_hv_vhca_per_ring_stats *)buf);
+ i += sizeof(struct mlx5e_hv_vhca_per_ring_stats) / sizeof(u64);
+ }
+}
+
+static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
+{
+ return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
+ priv->max_nch);
+}
+
+static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work *dwork;
+ struct mlx5e_priv *priv;
+ int buf_len, rc;
+ void *buf;
+
+ dwork = to_delayed_work(work);
+ sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
+ priv = container_of(sagent, struct mlx5e_priv, stats_agent);
+ buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ agent = sagent->agent;
+ buf = sagent->buf;
+
+ memset(buf, 0, buf_len);
+ mlx5e_hv_vhca_fill_stats(priv, buf, buf_len);
+
+ rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len);
+ if (rc) {
+ mlx5_core_err(priv->mdev,
+ "%s: Failed to write stats, err = %d\n",
+ __func__, rc);
+ return;
+ }
+
+ if (sagent->delay)
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+enum {
+ MLX5_HV_VHCA_STATS_VERSION = 1,
+ MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF,
+};
+
+static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5e_priv *priv;
+
+ priv = mlx5_hv_vhca_agent_priv(agent);
+ sagent = &priv->stats_agent;
+
+ block->version = MLX5_HV_VHCA_STATS_VERSION;
+ block->rings = priv->max_nch;
+
+ if (!block->command) {
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+ return;
+ }
+
+ sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 :
+ msecs_to_jiffies(block->command * 100);
+
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent);
+
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+}
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+ int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ struct mlx5_hv_vhca_agent *agent;
+
+ priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
+ if (!priv->stats_agent.buf)
+ return -ENOMEM;
+
+ agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
+ MLX5_HV_VHCA_AGENT_STATS,
+ mlx5e_hv_vhca_stats_control, NULL,
+ mlx5e_hv_vhca_stats_cleanup,
+ priv);
+
+ if (IS_ERR_OR_NULL(agent)) {
+ if (IS_ERR(agent))
+ netdev_warn(priv->netdev,
+ "Failed to create hv vhca stats agent, err = %ld\n",
+ PTR_ERR(agent));
+
+ kfree(priv->stats_agent.buf);
+ return IS_ERR_OR_NULL(agent);
+ }
+
+ priv->stats_agent.agent = agent;
+ INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
+
+ return 0;
+}
+
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+ if (IS_ERR_OR_NULL(priv->stats_agent.agent))
+ return;
+
+ mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
+ kfree(priv->stats_agent.buf);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
new file mode 100644
index 0000000..cd40600
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_STATS_VHCA_H__
+#define __MLX5_EN_STATS_VHCA_H__
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
+
+#else
+
+static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+}
+#endif
+
+#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5721d3d..fac8455 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -64,6 +64,7 @@
#include "en/xsk/setup.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
+#include "en/hv_vhca_stats.h"


bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
@@ -5103,6 +5104,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_init(priv);

+ mlx5e_hv_vhca_stats_create(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
#ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -5135,6 +5137,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)

queue_work(priv->wq, &priv->set_rx_mode_work);

+ mlx5e_hv_vhca_stats_destroy(priv);
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_cleanup(priv);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index 6f4bfb1..52ef78a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -13,6 +13,7 @@

enum mlx5_hv_vhca_agent_type {
MLX5_HV_VHCA_AGENT_CONTROL = 0,
+ MLX5_HV_VHCA_AGENT_STATS = 1,
MLX5_HV_VHCA_AGENT_MAX = 32,
};

--
1.8.3.1

2019-08-14 19:12:48

by Haiyang Zhang

[permalink] [raw]
Subject: [PATCH net-next, 4/6] net/mlx5: Add HV VHCA infrastructure

From: Eran Ben Elisha <[email protected]>

HV VHCA is a layer which provides PF to VF communication channel based on
HyperV PCI config channel. It implements Mellanox's Inter VHCA control
communication protocol. The protocol contains control block in order to
pass messages between the PF and VF drivers, and data blocks in order to
pass actual data.

The infrastructure is agent based. Each agent will be responsible of
contiguous buffer blocks in the VHCA config space. This infrastructure will
bind agents to their blocks, and those agents can only access read/write
the buffer blocks assigned to them. Each agent will provide three
callbacks (control, invalidate, cleanup). Control will be invoked when
block-0 is invalidated with a command that concerns this agent. Invalidate
callback will be invoked if one of the blocks assigned to this agent was
invalidated. Cleanup will be invoked before the agent is being freed in
order to clean all of its open resources or deferred works.

Block-0 serves as the control block. All execution commands from the PF
will be written by the PF over this block. VF will ack on those by
writing on block-0 as well. Its format is described by struct
mlx5_hv_vhca_control_block layout.

Signed-off-by: Eran Ben Elisha <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +-
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 247 +++++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 102 +++++++++
drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +
include/linux/mlx5/driver.h | 2 +
5 files changed, 359 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index a8950b1..e0a1056 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
-mlx5_core-$(CONFIG_PCI_HYPERV_MINI) += lib/hv.o
+mlx5_core-$(CONFIG_PCI_HYPERV_MINI)+= lib/hv.o lib/hv_vhca.o

#
# Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
new file mode 100644
index 0000000..b2eebdf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+#include "lib/hv_vhca.h"
+
+struct mlx5_hv_vhca {
+ struct mlx5_core_dev *dev;
+ struct workqueue_struct *work_queue;
+ struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX];
+ struct mutex agents_lock; /* Protect agents array */
+};
+
+struct mlx5_hv_vhca_work {
+ struct work_struct invalidate_work;
+ struct mlx5_hv_vhca *hv_vhca;
+ u64 block_mask;
+};
+
+struct mlx5_hv_vhca_data_block {
+ u16 sequence;
+ u16 offset;
+ u8 reserved[4];
+ u64 data[15];
+};
+
+struct mlx5_hv_vhca_agent {
+ enum mlx5_hv_vhca_agent_type type;
+ struct mlx5_hv_vhca *hv_vhca;
+ void *priv;
+ int seq;
+ void (*control)(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block);
+ void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
+ u64 block_mask);
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_hv_vhca *hv_vhca = NULL;
+
+ hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
+ if (!hv_vhca)
+ return ERR_PTR(-ENOMEM);
+
+ hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
+ if (!hv_vhca->work_queue) {
+ kfree(hv_vhca);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ hv_vhca->dev = dev;
+ mutex_init(&hv_vhca->agents_lock);
+
+ return hv_vhca;
+}
+
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return;
+
+ flush_workqueue(hv_vhca->work_queue);
+ destroy_workqueue(hv_vhca->work_queue);
+ kfree(hv_vhca);
+}
+
+static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
+{
+ struct mlx5_hv_vhca_work *hwork;
+ struct mlx5_hv_vhca *hv_vhca;
+ int i;
+
+ hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
+ hv_vhca = hwork->hv_vhca;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (!agent || !agent->invalidate)
+ continue;
+
+ if (!(BIT(agent->type) & hwork->block_mask))
+ continue;
+
+ agent->invalidate(agent, hwork->block_mask);
+ }
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ kfree(hwork);
+}
+
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
+{
+ struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
+ struct mlx5_hv_vhca_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
+ work->hv_vhca = hv_vhca;
+ work->block_mask = block_mask;
+
+ queue_work(hv_vhca->work_queue, &work->invalidate_work);
+}
+
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return IS_ERR_OR_NULL(hv_vhca);
+
+ return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+ mlx5_hv_vhca_invalidate);
+}
+
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+ int i;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
+ WARN_ON(hv_vhca->agents[i]);
+
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ mlx5_hv_unregister_invalidate(hv_vhca->dev);
+}
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
+ void *priv)
+{
+ struct mlx5_hv_vhca_agent *agent;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return ERR_PTR(-ENOMEM);
+
+ if (hv_vhca->agents[type])
+ return ERR_PTR(-EINVAL);
+
+ agent = kzalloc(sizeof(*agent), GFP_KERNEL);
+ if (!agent)
+ return ERR_PTR(-ENOMEM);
+
+ agent->type = type;
+ agent->hv_vhca = hv_vhca;
+ agent->priv = priv;
+ agent->control = control;
+ agent->invalidate = invalidate;
+ agent->cleanup = cleaup;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ hv_vhca->agents[type] = agent;
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ return agent;
+}
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+
+ mutex_lock(&hv_vhca->agents_lock);
+
+ if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
+ mutex_unlock(&hv_vhca->agents_lock);
+ return;
+ }
+
+ hv_vhca->agents[agent->type] = NULL;
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ if (agent->cleanup)
+ agent->cleanup(agent);
+
+ kfree(agent);
+}
+
+static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_data_block *data_block,
+ void *src, int len, int *offset)
+{
+ int bytes = min_t(int, (int)sizeof(data_block->data), len);
+
+ data_block->sequence = agent->seq;
+ data_block->offset = (*offset)++;
+ memcpy(data_block->data, src, bytes);
+
+ return bytes;
+}
+
+static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
+{
+ agent->seq++;
+}
+
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len)
+{
+ int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
+ int block_offset = 0;
+ int total = 0;
+ int err;
+
+ while (len) {
+ struct mlx5_hv_vhca_data_block data_block = {0};
+ int bytes;
+
+ bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
+ buf + total,
+ len, &block_offset);
+ if (!bytes)
+ return -ENOMEM;
+
+ err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
+ sizeof(data_block), offset);
+ if (err)
+ return err;
+
+ total += bytes;
+ len -= bytes;
+ }
+
+ mlx5_hv_vhca_agent_seq_update(agent);
+
+ return 0;
+}
+
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
+{
+ return agent->priv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
new file mode 100644
index 0000000..fa7ee85
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_VHCA_H__
+#define __LIB_HV_VHCA_H__
+
+#include "en.h"
+#include "lib/hv.h"
+
+struct mlx5_hv_vhca_agent;
+struct mlx5_hv_vhca;
+struct mlx5_hv_vhca_control_block;
+
+enum mlx5_hv_vhca_agent_type {
+ MLX5_HV_VHCA_AGENT_MAX = 32,
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
+
+struct mlx5_hv_vhca_control_block {
+ u32 capabilities;
+ u32 control;
+ u16 command;
+ u16 command_ack;
+ u16 version;
+ u16 rings;
+ u32 reserved1[28];
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+ void *context);
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len);
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
+
+#else
+
+static inline struct mlx5_hv_vhca *
+mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+ return NULL;
+}
+
+static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+ return 0;
+}
+
+static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline void mlx5_hv_vhca_invalidate(void *context,
+ u64 block_mask)
+{
+}
+
+static inline struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+ void *context)
+{
+ return NULL;
+}
+
+static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+}
+
+static inline int
+mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len)
+{
+ return 0;
+}
+#endif
+
+#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 4cc90eb..50ee38b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -69,6 +69,7 @@
#include "lib/pci_vsc.h"
#include "diag/fw_tracer.h"
#include "ecpf.h"
+#include "lib/hv_vhca.h"

MODULE_AUTHOR("Eli Cohen <[email protected]>");
MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -872,6 +873,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
}

dev->tracer = mlx5_fw_tracer_create(dev);
+ dev->hv_vhca = mlx5_hv_vhca_create(dev);

return 0;

@@ -902,6 +904,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)

static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
+ mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_fpga_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -1068,6 +1071,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_fw_tracer;
}

+ mlx5_hv_vhca_init(dev->hv_vhca);
+
err = mlx5_fpga_device_start(dev);
if (err) {
mlx5_core_err(dev, "fpga device start failed %d\n", err);
@@ -1123,6 +1128,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
+ mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
mlx5_eq_table_destroy(dev);
@@ -1143,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
+ mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2b84ee9..97bb98c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -646,6 +646,7 @@ struct mlx5_clock {
struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_geneve;
+struct mlx5_hv_vhca;

struct mlx5_core_dev {
struct device *device;
@@ -693,6 +694,7 @@ struct mlx5_core_dev {
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
u32 vsc_addr;
+ struct mlx5_hv_vhca *hv_vhca;
};

struct mlx5_db {
--
1.8.3.1

2019-08-14 19:12:51

by Haiyang Zhang

[permalink] [raw]
Subject: [PATCH net-next, 1/6] PCI: hv: Add a paravirtual backchannel in software

From: Dexuan Cui <[email protected]>

Windows SR-IOV provides a backchannel mechanism in software for communication
between a VF driver and a PF driver. These "configuration blocks" are
similar in concept to PCI configuration space, but instead of doing reads and
writes in 32-bit chunks through a very slow path, packets of up to 128 bytes
can be sent or received asynchronously.

Nearly every SR-IOV device contains just such a communications channel in
hardware, so using this one in software is usually optional. Using the
software channel, however, allows driver implementers to leverage software
tools that fuzz the communications channel looking for vulnerabilities.

The usage model for these packets puts the responsibility for reading or
writing on the VF driver. The VF driver sends a read or a write packet,
indicating which "block" is being referred to by number.

If the PF driver wishes to initiate communication, it can "invalidate" one or
more of the first 64 blocks. This invalidation is delivered via a callback
supplied by the VF driver by this driver.

No protocol is implied, except that supplied by the PF and VF drivers.

Signed-off-by: Jake Oshins <[email protected]>
Signed-off-by: Dexuan Cui <[email protected]>
Cc: Haiyang Zhang <[email protected]>
Cc: K. Y. Srinivasan <[email protected]>
Cc: Stephen Hemminger <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
Signed-off-by: Haiyang Zhang <[email protected]>
---
drivers/pci/controller/pci-hyperv.c | 302 ++++++++++++++++++++++++++++++++++++
include/linux/hyperv.h | 15 ++
2 files changed, 317 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 40b6254..57adeca 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -365,6 +365,39 @@ struct pci_delete_interrupt {
struct tran_int_desc int_desc;
} __packed;

+/*
+ * Note: the VM must pass a valid block id, wslot and bytes_requested.
+ */
+struct pci_read_block {
+ struct pci_message message_type;
+ u32 block_id;
+ union win_slot_encoding wslot;
+ u32 bytes_requested;
+} __packed;
+
+struct pci_read_block_response {
+ struct vmpacket_descriptor hdr;
+ u32 status;
+ u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+/*
+ * Note: the VM must pass a valid block id, wslot and byte_count.
+ */
+struct pci_write_block {
+ struct pci_message message_type;
+ u32 block_id;
+ union win_slot_encoding wslot;
+ u32 byte_count;
+ u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+struct pci_dev_inval_block {
+ struct pci_incoming_message incoming;
+ union win_slot_encoding wslot;
+ u64 block_mask;
+} __packed;
+
struct pci_dev_incoming {
struct pci_incoming_message incoming;
union win_slot_encoding wslot;
@@ -499,6 +532,9 @@ struct hv_pci_dev {
struct hv_pcibus_device *hbus;
struct work_struct wrk;

+ void (*block_invalidate)(void *context, u64 block_mask);
+ void *invalidate_context;
+
/*
* What would be observed if one wrote 0xFFFFFFFF to a BAR and then
* read it back, for each of the BAR offsets within config space.
@@ -817,6 +853,256 @@ static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
.write = hv_pcifront_write_config,
};

+/*
+ * Paravirtual backchannel
+ *
+ * Hyper-V SR-IOV provides a backchannel mechanism in software for
+ * communication between a VF driver and a PF driver. These
+ * "configuration blocks" are similar in concept to PCI configuration space,
+ * but instead of doing reads and writes in 32-bit chunks through a very slow
+ * path, packets of up to 128 bytes can be sent or received asynchronously.
+ *
+ * Nearly every SR-IOV device contains just such a communications channel in
+ * hardware, so using this one in software is usually optional. Using the
+ * software channel, however, allows driver implementers to leverage software
+ * tools that fuzz the communications channel looking for vulnerabilities.
+ *
+ * The usage model for these packets puts the responsibility for reading or
+ * writing on the VF driver. The VF driver sends a read or a write packet,
+ * indicating which "block" is being referred to by number.
+ *
+ * If the PF driver wishes to initiate communication, it can "invalidate" one or
+ * more of the first 64 blocks. This invalidation is delivered via a callback
+ * supplied by the VF driver by this driver.
+ *
+ * No protocol is implied, except that supplied by the PF and VF drivers.
+ */
+
+struct hv_read_config_compl {
+ struct hv_pci_compl comp_pkt;
+ void *buf;
+ unsigned int len;
+ unsigned int bytes_returned;
+};
+
+/**
+ * hv_pci_read_config_compl() - Invoked when a response packet
+ * for a read config block operation arrives.
+ * @context: Identifies the read config operation
+ * @resp: The response packet itself
+ * @resp_packet_size: Size in bytes of the response packet
+ */
+static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
+ int resp_packet_size)
+{
+ struct hv_read_config_compl *comp = context;
+ struct pci_read_block_response *read_resp =
+ (struct pci_read_block_response *)resp;
+ unsigned int data_len, hdr_len;
+
+ hdr_len = offsetof(struct pci_read_block_response, bytes);
+ if (resp_packet_size < hdr_len) {
+ comp->comp_pkt.completion_status = -1;
+ goto out;
+ }
+
+ data_len = resp_packet_size - hdr_len;
+ if (data_len > 0 && read_resp->status == 0) {
+ comp->bytes_returned = min(comp->len, data_len);
+ memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
+ } else {
+ comp->bytes_returned = 0;
+ }
+
+ comp->comp_pkt.completion_status = read_resp->status;
+out:
+ complete(&comp->comp_pkt.host_event);
+}
+
+/**
+ * hv_read_config_block() - Sends a read config block request to
+ * the back-end driver running in the Hyper-V parent partition.
+ * @pdev: The PCI driver's representation for this device.
+ * @buf: Buffer into which the config block will be copied.
+ * @len: Size in bytes of buf.
+ * @block_id: Identifies the config block which has been requested.
+ * @bytes_returned: Size which came back from the back-end driver.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+ unsigned int block_id, unsigned int *bytes_returned)
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct {
+ struct pci_packet pkt;
+ char buf[sizeof(struct pci_read_block)];
+ } pkt;
+ struct hv_read_config_compl comp_pkt;
+ struct pci_read_block *read_blk;
+ int ret;
+
+ if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ init_completion(&comp_pkt.comp_pkt.host_event);
+ comp_pkt.buf = buf;
+ comp_pkt.len = len;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.pkt.completion_func = hv_pci_read_config_compl;
+ pkt.pkt.compl_ctxt = &comp_pkt;
+ read_blk = (struct pci_read_block *)&pkt.pkt.message;
+ read_blk->message_type.type = PCI_READ_BLOCK;
+ read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+ read_blk->block_id = block_id;
+ read_blk->bytes_requested = len;
+
+ ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
+ sizeof(*read_blk), (unsigned long)&pkt.pkt,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ return ret;
+
+ ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
+ if (ret)
+ return ret;
+
+ if (comp_pkt.comp_pkt.completion_status != 0 ||
+ comp_pkt.bytes_returned == 0) {
+ dev_err(&hbus->hdev->device,
+ "Read Config Block failed: 0x%x, bytes_returned=%d\n",
+ comp_pkt.comp_pkt.completion_status,
+ comp_pkt.bytes_returned);
+ return -EIO;
+ }
+
+ *bytes_returned = comp_pkt.bytes_returned;
+ return 0;
+}
+EXPORT_SYMBOL(hv_read_config_block);
+
+/**
+ * hv_pci_write_config_compl() - Invoked when a response packet for a write
+ * config block operation arrives.
+ * @context: Identifies the write config operation
+ * @resp: The response packet itself
+ * @resp_packet_size: Size in bytes of the response packet
+ */
+static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
+ int resp_packet_size)
+{
+ struct hv_pci_compl *comp_pkt = context;
+
+ comp_pkt->completion_status = resp->status;
+ complete(&comp_pkt->host_event);
+}
+
+/**
+ * hv_write_config_block() - Sends a write config block request to the
+ * back-end driver running in the Hyper-V parent partition.
+ * @pdev: The PCI driver's representation for this device.
+ * @buf: Buffer from which the config block will be copied.
+ * @len: Size in bytes of buf.
+ * @block_id: Identifies the config block which is being written.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+ unsigned int block_id)
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct {
+ struct pci_packet pkt;
+ char buf[sizeof(struct pci_write_block)];
+ u32 reserved;
+ } pkt;
+ struct hv_pci_compl comp_pkt;
+ struct pci_write_block *write_blk;
+ u32 pkt_size;
+ int ret;
+
+ if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ init_completion(&comp_pkt.host_event);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.pkt.completion_func = hv_pci_write_config_compl;
+ pkt.pkt.compl_ctxt = &comp_pkt;
+ write_blk = (struct pci_write_block *)&pkt.pkt.message;
+ write_blk->message_type.type = PCI_WRITE_BLOCK;
+ write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+ write_blk->block_id = block_id;
+ write_blk->byte_count = len;
+ memcpy(write_blk->bytes, buf, len);
+ pkt_size = offsetof(struct pci_write_block, bytes) + len;
+ /*
+ * This quirk is required on some hosts shipped around 2018, because
+ * these hosts don't check the pkt_size correctly (new hosts have been
+ * fixed since early 2019). The quirk is also safe on very old hosts
+ * and new hosts, because, on them, what really matters is the length
+ * specified in write_blk->byte_count.
+ */
+ pkt_size += sizeof(pkt.reserved);
+
+ ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
+ (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ return ret;
+
+ ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
+ if (ret)
+ return ret;
+
+ if (comp_pkt.completion_status != 0) {
+ dev_err(&hbus->hdev->device,
+ "Write Config Block failed: 0x%x\n",
+ comp_pkt.completion_status);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(hv_write_config_block);
+
+/**
+ * hv_register_block_invalidate() - Invoked when a config block invalidation
+ * arrives from the back-end driver.
+ * @pdev: The PCI driver's representation for this device.
+ * @context: Identifies the device.
+ * @block_invalidate: Identifies all of the blocks being invalidated.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct hv_pci_dev *hpdev;
+
+ hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+ if (!hpdev)
+ return -ENODEV;
+
+ hpdev->block_invalidate = block_invalidate;
+ hpdev->invalidate_context = context;
+
+ put_pcichild(hpdev);
+ return 0;
+
+}
+EXPORT_SYMBOL(hv_register_block_invalidate);
+
/* Interrupt management hooks */
static void hv_int_desc_free(struct hv_pci_dev *hpdev,
struct tran_int_desc *int_desc)
@@ -1968,6 +2254,7 @@ static void hv_pci_onchannelcallback(void *context)
struct pci_response *response;
struct pci_incoming_message *new_message;
struct pci_bus_relations *bus_rel;
+ struct pci_dev_inval_block *inval;
struct pci_dev_incoming *dev_message;
struct hv_pci_dev *hpdev;

@@ -2045,6 +2332,21 @@ static void hv_pci_onchannelcallback(void *context)
}
break;

+ case PCI_INVALIDATE_BLOCK:
+
+ inval = (struct pci_dev_inval_block *)buffer;
+ hpdev = get_pcichild_wslot(hbus,
+ inval->wslot.slot);
+ if (hpdev) {
+ if (hpdev->block_invalidate) {
+ hpdev->block_invalidate(
+ hpdev->invalidate_context,
+ inval->block_mask);
+ }
+ put_pcichild(hpdev);
+ }
+ break;
+
default:
dev_warn(&hbus->hdev->device,
"Unimplemented protocol message %x\n",
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc3..9d37f8c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1578,4 +1578,19 @@ struct vmpacket_descriptor *
for (pkt = hv_pkt_iter_first(channel); pkt; \
pkt = hv_pkt_iter_next(channel, pkt))

+/*
+ * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
+ * sends requests to read and write blocks. Each block must be 128 bytes or
+ * smaller. Optionally, the VF driver can register a callback function which
+ * will be invoked when the host says that one or more of the first 64 block
+ * IDs is "invalid" which means that the VF driver should reread them.
+ */
+#define HV_CONFIG_BLOCK_SIZE_MAX 128
+int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+int hv_register_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
#endif /* _HYPERV_H */
--
1.8.3.1

2019-08-14 19:13:16

by Haiyang Zhang

[permalink] [raw]
Subject: [PATCH net-next, 5/6] net/mlx5: Add HV VHCA control agent

From: Eran Ben Elisha <[email protected]>

Control agent is responsible over of the control block (ID 0). It should
update the PF via this block about every capability change. In addition,
upon block 0 invalidate, it should activate all other supported agents
with data requests from the PF.

Upon agent create/destroy, the invalidate callback of the control agent
is being called in order to update the PF driver about this change.

The control agent is an integral part of HV VHCA and will be created
and destroy as part of the HV VHCA init/cleanup flow.

Signed-off-by: Eran Ben Elisha <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
---
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 122 ++++++++++++++++++++-
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 1 +
2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
index b2eebdf..3c7fffa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -110,22 +110,131 @@ void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
queue_work(hv_vhca->work_queue, &work->invalidate_work);
}

+#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
+
+static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ int i;
+
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (!agent || !agent->control)
+ continue;
+
+ if (!(AGENT_MASK(agent->type) & block->control))
+ continue;
+
+ agent->control(agent, block);
+ }
+}
+
+static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
+ u32 *capabilities)
+{
+ int i;
+
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (agent)
+ *capabilities |= AGENT_MASK(agent->type);
+ }
+}
+
+static void
+mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
+ u64 block_mask)
+{
+ struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+ struct mlx5_core_dev *dev = hv_vhca->dev;
+ struct mlx5_hv_vhca_control_block *block;
+ u32 capabilities = 0;
+ int err;
+
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return;
+
+ err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
+ if (err)
+ goto free_block;
+
+ mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
+
+ /* In case no capabilities, send empty block in return */
+ if (!capabilities) {
+ memset(block, 0, sizeof(*block));
+ goto write;
+ }
+
+ if (block->capabilities != capabilities)
+ block->capabilities = capabilities;
+
+ if (block->control & ~capabilities)
+ goto free_block;
+
+ mlx5_hv_vhca_agents_control(hv_vhca, block);
+ block->command_ack = block->command;
+
+write:
+ mlx5_hv_write_config(dev, block, sizeof(*block), 0);
+
+free_block:
+ kfree(block);
+}
+
+static struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
+{
+ return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
+ NULL,
+ mlx5_hv_vhca_control_agent_invalidate,
+ NULL, NULL);
+}
+
+static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+ mlx5_hv_vhca_agent_destroy(agent);
+}
+
int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
{
+ struct mlx5_hv_vhca_agent *agent;
+ int err;
+
if (IS_ERR_OR_NULL(hv_vhca))
return IS_ERR_OR_NULL(hv_vhca);

- return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
- mlx5_hv_vhca_invalidate);
+ err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+ mlx5_hv_vhca_invalidate);
+ if (err)
+ return err;
+
+ agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
+ if (IS_ERR_OR_NULL(agent)) {
+ mlx5_hv_unregister_invalidate(hv_vhca->dev);
+ return IS_ERR_OR_NULL(agent);
+ }
+
+ hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
+
+ return 0;
}

void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
{
+ struct mlx5_hv_vhca_agent *agent;
int i;

if (IS_ERR_OR_NULL(hv_vhca))
return;

+ agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
+ if (!IS_ERR_OR_NULL(agent))
+ mlx5_hv_vhca_control_agent_destroy(agent);
+
mutex_lock(&hv_vhca->agents_lock);
for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
WARN_ON(hv_vhca->agents[i]);
@@ -135,6 +244,11 @@ void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
mlx5_hv_unregister_invalidate(hv_vhca->dev);
}

+static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
+{
+ mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
+}
+
struct mlx5_hv_vhca_agent *
mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
enum mlx5_hv_vhca_agent_type type,
@@ -168,6 +282,8 @@ struct mlx5_hv_vhca_agent *
hv_vhca->agents[type] = agent;
mutex_unlock(&hv_vhca->agents_lock);

+ mlx5_hv_vhca_agents_update(hv_vhca);
+
return agent;
}

@@ -189,6 +305,8 @@ void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
agent->cleanup(agent);

kfree(agent);
+
+ mlx5_hv_vhca_agents_update(hv_vhca);
}

static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index fa7ee85..6f4bfb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -12,6 +12,7 @@
struct mlx5_hv_vhca_control_block;

enum mlx5_hv_vhca_agent_type {
+ MLX5_HV_VHCA_AGENT_CONTROL = 0,
MLX5_HV_VHCA_AGENT_MAX = 32,
};

--
1.8.3.1

2019-08-14 19:14:34

by Haiyang Zhang

[permalink] [raw]
Subject: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface

This mini driver is a helper driver allows other drivers to
have a common interface with the Hyper-V PCI frontend driver.

Signed-off-by: Haiyang Zhang <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
---
MAINTAINERS | 1 +
drivers/pci/Kconfig | 1 +
drivers/pci/controller/Kconfig | 7 ++++
drivers/pci/controller/Makefile | 1 +
drivers/pci/controller/pci-hyperv-mini.c | 70 ++++++++++++++++++++++++++++++++
drivers/pci/controller/pci-hyperv.c | 12 ++++--
include/linux/hyperv.h | 30 ++++++++++----
7 files changed, 111 insertions(+), 11 deletions(-)
create mode 100644 drivers/pci/controller/pci-hyperv-mini.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e352550..c4962b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7453,6 +7453,7 @@ F: drivers/hid/hid-hyperv.c
F: drivers/hv/
F: drivers/input/serio/hyperv-keyboard.c
F: drivers/pci/controller/pci-hyperv.c
+F: drivers/pci/controller/pci-hyperv-mini.c
F: drivers/net/hyperv/
F: drivers/scsi/storvsc_drv.c
F: drivers/uio/uio_hv_generic.c
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab9240..bb852f5 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -182,6 +182,7 @@ config PCI_LABEL
config PCI_HYPERV
tristate "Hyper-V PCI Frontend"
depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ select PCI_HYPERV_MINI
help
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index fe9f9f1..8e31cba 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -281,5 +281,12 @@ config VMD
To compile this driver as a module, choose M here: the
module will be called vmd.

+config PCI_HYPERV_MINI
+ tristate "Hyper-V PCI Mini"
+ depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ help
+ The Hyper-V PCI Mini is a helper driver allows other drivers to
+ have a common interface with the Hyper-V PCI frontend driver.
+
source "drivers/pci/controller/dwc/Kconfig"
endmenu
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
index d56a507..77e0132 100644
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
+obj-$(CONFIG_PCI_HYPERV_MINI) += pci-hyperv-mini.o
obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
diff --git a/drivers/pci/controller/pci-hyperv-mini.c b/drivers/pci/controller/pci-hyperv-mini.c
new file mode 100644
index 0000000..9b6cd1c
--- /dev/null
+++ b/drivers/pci/controller/pci-hyperv-mini.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Author:
+ * Haiyang Zhang <[email protected]>
+ *
+ * This mini driver is a helper driver allows other drivers to
+ * have a common interface with the Hyper-V PCI frontend driver.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hyperv.h>
+
+struct hyperv_pci_block_ops hvpci_block_ops;
+EXPORT_SYMBOL(hvpci_block_ops);
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned)
+{
+ if (!hvpci_block_ops.read_block)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
+ bytes_returned);
+}
+EXPORT_SYMBOL(hyperv_read_cfg_blk);
+
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id)
+{
+ if (!hvpci_block_ops.write_block)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.write_block(dev, buf, len, block_id);
+}
+EXPORT_SYMBOL(hyperv_write_cfg_blk);
+
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ if (!hvpci_block_ops.reg_blk_invalidate)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.reg_blk_invalidate(dev, context,
+ block_invalidate);
+}
+EXPORT_SYMBOL(hyperv_reg_block_invalidate);
+
+static void __exit exit_hv_pci_mini(void)
+{
+ pr_info("unloaded\n");
+}
+
+static int __init init_hv_pci_mini(void)
+{
+ pr_info("loaded\n");
+
+ return 0;
+}
+
+module_init(init_hv_pci_mini);
+module_exit(exit_hv_pci_mini);
+
+MODULE_DESCRIPTION("Hyper-V PCI Mini");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 57adeca..9c93ac2 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
*bytes_returned = comp_pkt.bytes_returned;
return 0;
}
-EXPORT_SYMBOL(hv_read_config_block);

/**
* hv_pci_write_config_compl() - Invoked when a response packet for a write
@@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,

return 0;
}
-EXPORT_SYMBOL(hv_write_config_block);

/**
* hv_register_block_invalidate() - Invoked when a config block invalidation
@@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
return 0;

}
-EXPORT_SYMBOL(hv_register_block_invalidate);

/* Interrupt management hooks */
static void hv_int_desc_free(struct hv_pci_dev *hpdev,
@@ -3045,10 +3042,19 @@ static int hv_pci_remove(struct hv_device *hdev)
static void __exit exit_hv_pci_drv(void)
{
vmbus_driver_unregister(&hv_pci_drv);
+
+ hvpci_block_ops.read_block = NULL;
+ hvpci_block_ops.write_block = NULL;
+ hvpci_block_ops.reg_blk_invalidate = NULL;
}

static int __init init_hv_pci_drv(void)
{
+ /* Initialize PCI block r/w interface */
+ hvpci_block_ops.read_block = hv_read_config_block;
+ hvpci_block_ops.write_block = hv_write_config_block;
+ hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
+
return vmbus_driver_register(&hv_pci_drv);
}

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 9d37f8c..2afe6fd 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1579,18 +1579,32 @@ struct vmpacket_descriptor *
pkt = hv_pkt_iter_next(channel, pkt))

/*
- * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
+ * Interface for passing data between SR-IOV PF and VF drivers. The VF driver
* sends requests to read and write blocks. Each block must be 128 bytes or
* smaller. Optionally, the VF driver can register a callback function which
* will be invoked when the host says that one or more of the first 64 block
* IDs is "invalid" which means that the VF driver should reread them.
*/
#define HV_CONFIG_BLOCK_SIZE_MAX 128
-int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
- unsigned int block_id, unsigned int *bytes_returned);
-int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
- unsigned int block_id);
-int hv_register_block_invalidate(struct pci_dev *dev, void *context,
- void (*block_invalidate)(void *context,
- u64 block_mask));
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+
+struct hyperv_pci_block_ops {
+ int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+ int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+ int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+};
+
+extern struct hyperv_pci_block_ops hvpci_block_ops;
+
#endif /* _HYPERV_H */
--
1.8.3.1

2019-08-14 20:47:00

by Mark Bloch

[permalink] [raw]
Subject: Re: [PATCH net-next, 5/6] net/mlx5: Add HV VHCA control agent



On 8/14/19 12:09 PM, Haiyang Zhang wrote:
> From: Eran Ben Elisha <[email protected]>
>
> Control agent is responsible over of the control block (ID 0). It should
> update the PF via this block about every capability change. In addition,
> upon block 0 invalidate, it should activate all other supported agents
> with data requests from the PF.
>
> Upon agent create/destroy, the invalidate callback of the control agent
> is being called in order to update the PF driver about this change.
>
> The control agent is an integral part of HV VHCA and will be created
> and destroy as part of the HV VHCA init/cleanup flow.
>
> Signed-off-by: Eran Ben Elisha <[email protected]>
> Signed-off-by: Saeed Mahameed <[email protected]>
> ---
> .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 122 ++++++++++++++++++++-
> .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 1 +
> 2 files changed, 121 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> index b2eebdf..3c7fffa 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> @@ -110,22 +110,131 @@ void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
> queue_work(hv_vhca->work_queue, &work->invalidate_work);
> }
>
> +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
> +
> +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
> + struct mlx5_hv_vhca_control_block *block)
> +{
> + int i;
> +
> + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
> + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
> +
> + if (!agent || !agent->control)
> + continue;
> +
> + if (!(AGENT_MASK(agent->type) & block->control))
> + continue;
> +
> + agent->control(agent, block);
> + }
> +}
> +
> +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
> + u32 *capabilities)
> +{
> + int i;
> +
> + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
> + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
> +
> + if (agent)
> + *capabilities |= AGENT_MASK(agent->type);
> + }
> +}
> +
> +static void
> +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
> + u64 block_mask)
> +{
> + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
> + struct mlx5_core_dev *dev = hv_vhca->dev;
> + struct mlx5_hv_vhca_control_block *block;
> + u32 capabilities = 0;
> + int err;
> +
> + block = kzalloc(sizeof(*block), GFP_KERNEL);
> + if (!block)
> + return;
> +
> + err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
> + if (err)
> + goto free_block;
> +
> + mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
> +
> + /* In case no capabilities, send empty block in return */
> + if (!capabilities) {
> + memset(block, 0, sizeof(*block));
> + goto write;
> + }
> +
> + if (block->capabilities != capabilities)
> + block->capabilities = capabilities;
> +
> + if (block->control & ~capabilities)
> + goto free_block;
> +
> + mlx5_hv_vhca_agents_control(hv_vhca, block);
> + block->command_ack = block->command;
> +
> +write:
> + mlx5_hv_write_config(dev, block, sizeof(*block), 0);
> +
> +free_block:
> + kfree(block);
> +}
> +
> +static struct mlx5_hv_vhca_agent *
> +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
> +{
> + return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
> + NULL,
> + mlx5_hv_vhca_control_agent_invalidate,
> + NULL, NULL);
> +}
> +
> +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
> +{
> + mlx5_hv_vhca_agent_destroy(agent);
> +}
> +
> int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
> {
> + struct mlx5_hv_vhca_agent *agent;
> + int err;
> +
> if (IS_ERR_OR_NULL(hv_vhca))
> return IS_ERR_OR_NULL(hv_vhca);
>
> - return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
> - mlx5_hv_vhca_invalidate);
> + err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
> + mlx5_hv_vhca_invalidate);
> + if (err)
> + return err;
> +
> + agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
> + if (IS_ERR_OR_NULL(agent)) {
> + mlx5_hv_unregister_invalidate(hv_vhca->dev);
> + return IS_ERR_OR_NULL(agent);
> + }
> +
> + hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
> +
> + return 0;
> }
>
> void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
> {
> + struct mlx5_hv_vhca_agent *agent;
> int i;
>
> if (IS_ERR_OR_NULL(hv_vhca))
> return;
>
> + agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
> + if (!IS_ERR_OR_NULL(agent))
> + mlx5_hv_vhca_control_agent_destroy(agent);

Can the agent be err ptr here?

> +
> mutex_lock(&hv_vhca->agents_lock);
> for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
> WARN_ON(hv_vhca->agents[i]);

With the comment above in mind, here you check only for not null

> @@ -135,6 +244,11 @@ void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
> mlx5_hv_unregister_invalidate(hv_vhca->dev);
> }
>
> +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
> +{
> + mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
> +}
> +
> struct mlx5_hv_vhca_agent *
> mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
> enum mlx5_hv_vhca_agent_type type,
> @@ -168,6 +282,8 @@ struct mlx5_hv_vhca_agent *
> hv_vhca->agents[type] = agent;
> mutex_unlock(&hv_vhca->agents_lock);
>
> + mlx5_hv_vhca_agents_update(hv_vhca);
> +
> return agent;
> }
>
> @@ -189,6 +305,8 @@ void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
> agent->cleanup(agent);
>
> kfree(agent);
> +
> + mlx5_hv_vhca_agents_update(hv_vhca);
> }
>
> static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> index fa7ee85..6f4bfb1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> @@ -12,6 +12,7 @@
> struct mlx5_hv_vhca_control_block;
>
> enum mlx5_hv_vhca_agent_type {
> + MLX5_HV_VHCA_AGENT_CONTROL = 0,

No need to start value

> MLX5_HV_VHCA_AGENT_MAX = 32,
> };
>
>

Mark

2019-08-14 20:48:07

by Mark Bloch

[permalink] [raw]
Subject: Re: [PATCH net-next, 4/6] net/mlx5: Add HV VHCA infrastructure



On 8/14/19 12:08 PM, Haiyang Zhang wrote:
> From: Eran Ben Elisha <[email protected]>
>
> HV VHCA is a layer which provides PF to VF communication channel based on
> HyperV PCI config channel. It implements Mellanox's Inter VHCA control
> communication protocol. The protocol contains control block in order to
> pass messages between the PF and VF drivers, and data blocks in order to
> pass actual data.
>
> The infrastructure is agent based. Each agent will be responsible of
> contiguous buffer blocks in the VHCA config space. This infrastructure will
> bind agents to their blocks, and those agents can only access read/write
> the buffer blocks assigned to them. Each agent will provide three
> callbacks (control, invalidate, cleanup). Control will be invoked when
> block-0 is invalidated with a command that concerns this agent. Invalidate
> callback will be invoked if one of the blocks assigned to this agent was
> invalidated. Cleanup will be invoked before the agent is being freed in
> order to clean all of its open resources or deferred works.
>
> Block-0 serves as the control block. All execution commands from the PF
> will be written by the PF over this block. VF will ack on those by
> writing on block-0 as well. Its format is described by struct
> mlx5_hv_vhca_control_block layout.
>
> Signed-off-by: Eran Ben Elisha <[email protected]>
> Signed-off-by: Saeed Mahameed <[email protected]>
> ---
> drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +-
> .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 247 +++++++++++++++++++++
> .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 102 +++++++++
> drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +
> include/linux/mlx5/driver.h | 2 +
> 5 files changed, 359 insertions(+), 1 deletion(-)
> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index a8950b1..e0a1056 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
> mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
> mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
> mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> -mlx5_core-$(CONFIG_PCI_HYPERV_MINI) += lib/hv.o
> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI)+= lib/hv.o lib/hv_vhca.o
>
> #
> # Ipoib netdev
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> new file mode 100644
> index 0000000..b2eebdf
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> @@ -0,0 +1,247 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +// Copyright (c) 2018 Mellanox Technologies
> +
> +#include <linux/hyperv.h>
> +#include "mlx5_core.h"
> +#include "lib/hv.h"
> +#include "lib/hv_vhca.h"
> +
> +struct mlx5_hv_vhca {
> + struct mlx5_core_dev *dev;
> + struct workqueue_struct *work_queue;
> + struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX];
> + struct mutex agents_lock; /* Protect agents array */
> +};
> +
> +struct mlx5_hv_vhca_work {
> + struct work_struct invalidate_work;
> + struct mlx5_hv_vhca *hv_vhca;
> + u64 block_mask;
> +};
> +
> +struct mlx5_hv_vhca_data_block {
> + u16 sequence;
> + u16 offset;
> + u8 reserved[4];
> + u64 data[15];
> +};
> +
> +struct mlx5_hv_vhca_agent {
> + enum mlx5_hv_vhca_agent_type type;
> + struct mlx5_hv_vhca *hv_vhca;
> + void *priv;
> + int seq;
Why is this int? and in data block is u16?

> + void (*control)(struct mlx5_hv_vhca_agent *agent,
> + struct mlx5_hv_vhca_control_block *block);
> + void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
> + u64 block_mask);
> + void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
> +};
> +
> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
> +{
> + struct mlx5_hv_vhca *hv_vhca = NULL;
> +
> + hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
> + if (!hv_vhca)
> + return ERR_PTR(-ENOMEM);
> +
> + hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
> + if (!hv_vhca->work_queue) {
> + kfree(hv_vhca);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + hv_vhca->dev = dev;
> + mutex_init(&hv_vhca->agents_lock);
> +
> + return hv_vhca;
> +}
> +
> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
> +{
> + if (IS_ERR_OR_NULL(hv_vhca))
> + return;
> +
> + flush_workqueue(hv_vhca->work_queue);
> + destroy_workqueue(hv_vhca->work_queue);

Why not just destroy?

> + kfree(hv_vhca);
> +}
> +
> +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
> +{
> + struct mlx5_hv_vhca_work *hwork;
> + struct mlx5_hv_vhca *hv_vhca;
> + int i;
> +
> + hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
> + hv_vhca = hwork->hv_vhca;
> +
> + mutex_lock(&hv_vhca->agents_lock);
> + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
> + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
> +
> + if (!agent || !agent->invalidate)
> + continue;
> +
> + if (!(BIT(agent->type) & hwork->block_mask))
> + continue;
> +
> + agent->invalidate(agent, hwork->block_mask);
> + }
> + mutex_unlock(&hv_vhca->agents_lock);
> +
> + kfree(hwork);
> +}
> +
> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
> +{
> + struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
> + struct mlx5_hv_vhca_work *work;
> +
> + work = kzalloc(sizeof(*work), GFP_ATOMIC);
> + if (!work)
> + return;
> +
> + INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
> + work->hv_vhca = hv_vhca;
> + work->block_mask = block_mask;
> +
> + queue_work(hv_vhca->work_queue, &work->invalidate_work);
> +}
> +
> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
> +{
> + if (IS_ERR_OR_NULL(hv_vhca))
> + return IS_ERR_OR_NULL(hv_vhca);
> +
> + return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
> + mlx5_hv_vhca_invalidate);
> +}
> +
> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
> +{
> + int i;
> +
> + if (IS_ERR_OR_NULL(hv_vhca))
> + return;
> +
> + mutex_lock(&hv_vhca->agents_lock);
> + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
> + WARN_ON(hv_vhca->agents[i]);
> +
> + mutex_unlock(&hv_vhca->agents_lock);
> +
> + mlx5_hv_unregister_invalidate(hv_vhca->dev);
> +}
> +
> +struct mlx5_hv_vhca_agent *
> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
> + enum mlx5_hv_vhca_agent_type type,
> + void (*control)(struct mlx5_hv_vhca_agent*,
> + struct mlx5_hv_vhca_control_block *block),
> + void (*invalidate)(struct mlx5_hv_vhca_agent*,
> + u64 block_mask),
> + void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
> + void *priv)
> +{
> + struct mlx5_hv_vhca_agent *agent;
> +
> + if (IS_ERR_OR_NULL(hv_vhca))
> + return ERR_PTR(-ENOMEM);
> +
> + if (hv_vhca->agents[type])
> + return ERR_PTR(-EINVAL);> +
> + agent = kzalloc(sizeof(*agent), GFP_KERNEL);
> + if (!agent)
> + return ERR_PTR(-ENOMEM);
> +
> + agent->type = type;
> + agent->hv_vhca = hv_vhca;
> + agent->priv = priv;
> + agent->control = control;
> + agent->invalidate = invalidate;
> + agent->cleanup = cleaup;
> +
> + mutex_lock(&hv_vhca->agents_lock);
> + hv_vhca->agents[type] = agent;
> + mutex_unlock(&hv_vhca->agents_lock);

You have a check for this not under a lock a few lines up,
but assign under a lock?

Mark

> +
> + return agent;
> +}
> +
> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
> +{
> + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
> +
> + mutex_lock(&hv_vhca->agents_lock);
> +
> + if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
> + mutex_unlock(&hv_vhca->agents_lock);
> + return;
> + }
> +
> + hv_vhca->agents[agent->type] = NULL;
> + mutex_unlock(&hv_vhca->agents_lock);
> +
> + if (agent->cleanup)
> + agent->cleanup(agent);
> +
> + kfree(agent);
> +}
> +
> +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
> + struct mlx5_hv_vhca_data_block *data_block,
> + void *src, int len, int *offset)
> +{
> + int bytes = min_t(int, (int)sizeof(data_block->data), len);
> +
> + data_block->sequence = agent->seq;
> + data_block->offset = (*offset)++;
> + memcpy(data_block->data, src, bytes);
> +
> + return bytes;
> +}
> +
> +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
> +{
> + agent->seq++;
> +}
> +
> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
> + void *buf, int len)
> +{
> + int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
> + int block_offset = 0;
> + int total = 0;
> + int err;
> +
> + while (len) {
> + struct mlx5_hv_vhca_data_block data_block = {0};
> + int bytes;
> +
> + bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
> + buf + total,
> + len, &block_offset);
> + if (!bytes)
> + return -ENOMEM;
> +
> + err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
> + sizeof(data_block), offset);
> + if (err)
> + return err;
> +
> + total += bytes;
> + len -= bytes;
> + }
> +
> + mlx5_hv_vhca_agent_seq_update(agent);
> +
> + return 0;
> +}
> +
> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
> +{
> + return agent->priv;
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> new file mode 100644
> index 0000000..fa7ee85
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2019 Mellanox Technologies. */
> +
> +#ifndef __LIB_HV_VHCA_H__
> +#define __LIB_HV_VHCA_H__
> +
> +#include "en.h"
> +#include "lib/hv.h"
> +
> +struct mlx5_hv_vhca_agent;
> +struct mlx5_hv_vhca;
> +struct mlx5_hv_vhca_control_block;
> +
> +enum mlx5_hv_vhca_agent_type {
> + MLX5_HV_VHCA_AGENT_MAX = 32,
> +};
> +
> +#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
> +
> +struct mlx5_hv_vhca_control_block {
> + u32 capabilities;
> + u32 control;
> + u16 command;
> + u16 command_ack;
> + u16 version;
> + u16 rings;
> + u32 reserved1[28];
> +};
> +
> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
> +
> +struct mlx5_hv_vhca_agent *
> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
> + enum mlx5_hv_vhca_agent_type type,
> + void (*control)(struct mlx5_hv_vhca_agent*,
> + struct mlx5_hv_vhca_control_block *block),
> + void (*invalidate)(struct mlx5_hv_vhca_agent*,
> + u64 block_mask),
> + void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
> + void *context);
> +
> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
> + void *buf, int len);
> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
> +
> +#else
> +
> +static inline struct mlx5_hv_vhca *
> +mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
> +{
> + return NULL;
> +}
> +
> +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
> +{
> +}
> +
> +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
> +{
> + return 0;
> +}
> +
> +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
> +{
> +}
> +
> +static inline void mlx5_hv_vhca_invalidate(void *context,
> + u64 block_mask)
> +{
> +}
> +
> +static inline struct mlx5_hv_vhca_agent *
> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
> + enum mlx5_hv_vhca_agent_type type,
> + void (*control)(struct mlx5_hv_vhca_agent*,
> + struct mlx5_hv_vhca_control_block *block),
> + void (*invalidate)(struct mlx5_hv_vhca_agent*,
> + u64 block_mask),
> + void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
> + void *context)
> +{
> + return NULL;
> +}
> +
> +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
> +{
> +}
> +
> +static inline int
> +mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
> + void *buf, int len)
> +{
> + return 0;
> +}
> +#endif
> +
> +#endif /* __LIB_HV_VHCA_H__ */
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> index 4cc90eb..50ee38b 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> @@ -69,6 +69,7 @@
> #include "lib/pci_vsc.h"
> #include "diag/fw_tracer.h"
> #include "ecpf.h"
> +#include "lib/hv_vhca.h"
>
> MODULE_AUTHOR("Eli Cohen <[email protected]>");
> MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
> @@ -872,6 +873,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
> }
>
> dev->tracer = mlx5_fw_tracer_create(dev);
> + dev->hv_vhca = mlx5_hv_vhca_create(dev);
>
> return 0;
>
> @@ -902,6 +904,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>
> static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
> {
> + mlx5_hv_vhca_destroy(dev->hv_vhca);
> mlx5_fw_tracer_destroy(dev->tracer);
> mlx5_fpga_cleanup(dev);
> mlx5_eswitch_cleanup(dev->priv.eswitch);
> @@ -1068,6 +1071,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
> goto err_fw_tracer;
> }
>
> + mlx5_hv_vhca_init(dev->hv_vhca);
> +
> err = mlx5_fpga_device_start(dev);
> if (err) {
> mlx5_core_err(dev, "fpga device start failed %d\n", err);
> @@ -1123,6 +1128,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
> err_ipsec_start:
> mlx5_fpga_device_stop(dev);
> err_fpga_start:
> + mlx5_hv_vhca_cleanup(dev->hv_vhca);
> mlx5_fw_tracer_cleanup(dev->tracer);
> err_fw_tracer:
> mlx5_eq_table_destroy(dev);
> @@ -1143,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
> mlx5_accel_ipsec_cleanup(dev);
> mlx5_accel_tls_cleanup(dev);
> mlx5_fpga_device_stop(dev);
> + mlx5_hv_vhca_cleanup(dev->hv_vhca);
> mlx5_fw_tracer_cleanup(dev->tracer);
> mlx5_eq_table_destroy(dev);
> mlx5_irq_table_destroy(dev);
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 2b84ee9..97bb98c 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -646,6 +646,7 @@ struct mlx5_clock {
> struct mlx5_fw_tracer;
> struct mlx5_vxlan;
> struct mlx5_geneve;
> +struct mlx5_hv_vhca;
>
> struct mlx5_core_dev {
> struct device *device;
> @@ -693,6 +694,7 @@ struct mlx5_core_dev {
> struct mlx5_ib_clock_info *clock_info;
> struct mlx5_fw_tracer *tracer;
> u32 vsc_addr;
> + struct mlx5_hv_vhca *hv_vhca;
> };
>
> struct mlx5_db {
>

2019-08-14 21:34:02

by Mark Bloch

[permalink] [raw]
Subject: Re: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations



On 8/14/19 12:08 PM, Haiyang Zhang wrote:
> From: Eran Ben Elisha <[email protected]>
>
> Add wrapper functions for HyperV PCIe read / write /
> block_invalidate_register operations. This will be used as an
> infrastructure in the downstream patch for software communication.
>
> This will be enabled by default if CONFIG_PCI_HYPERV_MINI is set.
>
> Signed-off-by: Eran Ben Elisha <[email protected]>
> Signed-off-by: Saeed Mahameed <[email protected]>
> ---
> drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
> drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
> drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
> 3 files changed, 87 insertions(+)
> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index 8b7edaa..a8950b1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
> mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
> mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
> mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI) += lib/hv.o
>
> #
> # Ipoib netdev
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> new file mode 100644
> index 0000000..cf08d02
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> @@ -0,0 +1,64 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +// Copyright (c) 2018 Mellanox Technologies
> +
> +#include <linux/hyperv.h>
> +#include "mlx5_core.h"
> +#include "lib/hv.h"
> +
> +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
> + int offset, bool read)
> +{
> + int rc = -EOPNOTSUPP;
> + int bytes_returned;
> + int block_id;
> +
> + if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
> + return -EINVAL;
> +
> + block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
> +
> + rc = read ?
> + hyperv_read_cfg_blk(dev->pdev, buf,
> + HV_CONFIG_BLOCK_SIZE_MAX, block_id,
> + &bytes_returned) :
> + hyperv_write_cfg_blk(dev->pdev, buf,
> + HV_CONFIG_BLOCK_SIZE_MAX, block_id);
> +
> + /* Make sure len bytes were read successfully */
> + if (read)
> + rc |= !(len == bytes_returned);
> +
> + if (rc) {
> + mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
> + read ? "read" : "write", rc, len,
> + offset);
> + return rc;
> + }
> +
> + return 0;
> +}

This seems out of place why not expose this function as part of hyperv and mlx5
will just pass the pdev.

> +
> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
> + int offset)
> +{
> + return mlx5_hv_config_common(dev, buf, len, offset, true);
> +}
> +
> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
> + int offset)
> +{
> + return mlx5_hv_config_common(dev, buf, len, offset, false);
> +}
> +
> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
> + void (*block_invalidate)(void *context,
> + u64 block_mask))
> +{
> + return hyperv_reg_block_invalidate(dev->pdev, context,
> + block_invalidate);
> +}
> +
> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
> +{
> + hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
> new file mode 100644
> index 0000000..7f69771
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2019 Mellanox Technologies. */
> +
> +#ifndef __LIB_HV_H__
> +#define __LIB_HV_H__
> +
> +#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
> +
> +#include <linux/hyperv.h>
> +#include <linux/mlx5/driver.h>
> +
> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
> + int offset);
> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
> + int offset);
> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
> + void (*block_invalidate)(void *context,
> + u64 block_mask));
> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
> +#endif
> +
> +#endif /* __LIB_HV_H__ */
>

Mark

2019-08-15 11:49:25

by Eran Ben Elisha

[permalink] [raw]
Subject: Re: [PATCH net-next, 5/6] net/mlx5: Add HV VHCA control agent



On 8/14/2019 11:41 PM, Mark Bloch wrote:
>
>
> On 8/14/19 12:09 PM, Haiyang Zhang wrote:
>> From: Eran Ben Elisha <[email protected]>
>>
>> Control agent is responsible over of the control block (ID 0). It should
>> update the PF via this block about every capability change. In addition,
>> upon block 0 invalidate, it should activate all other supported agents
>> with data requests from the PF.
>>
>> Upon agent create/destroy, the invalidate callback of the control agent
>> is being called in order to update the PF driver about this change.
>>
>> The control agent is an integral part of HV VHCA and will be created
>> and destroy as part of the HV VHCA init/cleanup flow.
>>
>> Signed-off-by: Eran Ben Elisha <[email protected]>
>> Signed-off-by: Saeed Mahameed <[email protected]>
>> ---
>> .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 122 ++++++++++++++++++++-
>> .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 1 +
>> 2 files changed, 121 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> index b2eebdf..3c7fffa 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> @@ -110,22 +110,131 @@ void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
>> queue_work(hv_vhca->work_queue, &work->invalidate_work);
>> }
>>
>> +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
>> +
>> +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
>> + struct mlx5_hv_vhca_control_block *block)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
>> + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
>> +
>> + if (!agent || !agent->control)
>> + continue;
>> +
>> + if (!(AGENT_MASK(agent->type) & block->control))
>> + continue;
>> +
>> + agent->control(agent, block);
>> + }
>> +}
>> +
>> +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
>> + u32 *capabilities)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
>> + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
>> +
>> + if (agent)
>> + *capabilities |= AGENT_MASK(agent->type);
>> + }
>> +}
>> +
>> +static void
>> +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
>> + u64 block_mask)
>> +{
>> + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
>> + struct mlx5_core_dev *dev = hv_vhca->dev;
>> + struct mlx5_hv_vhca_control_block *block;
>> + u32 capabilities = 0;
>> + int err;
>> +
>> + block = kzalloc(sizeof(*block), GFP_KERNEL);
>> + if (!block)
>> + return;
>> +
>> + err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
>> + if (err)
>> + goto free_block;
>> +
>> + mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
>> +
>> + /* In case no capabilities, send empty block in return */
>> + if (!capabilities) {
>> + memset(block, 0, sizeof(*block));
>> + goto write;
>> + }
>> +
>> + if (block->capabilities != capabilities)
>> + block->capabilities = capabilities;
>> +
>> + if (block->control & ~capabilities)
>> + goto free_block;
>> +
>> + mlx5_hv_vhca_agents_control(hv_vhca, block);
>> + block->command_ack = block->command;
>> +
>> +write:
>> + mlx5_hv_write_config(dev, block, sizeof(*block), 0);
>> +
>> +free_block:
>> + kfree(block);
>> +}
>> +
>> +static struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> + return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
>> + NULL,
>> + mlx5_hv_vhca_control_agent_invalidate,
>> + NULL, NULL);
>> +}
>> +
>> +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> +{
>> + mlx5_hv_vhca_agent_destroy(agent);
>> +}
>> +
>> int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>> {
>> + struct mlx5_hv_vhca_agent *agent;
>> + int err;
>> +
>> if (IS_ERR_OR_NULL(hv_vhca))
>> return IS_ERR_OR_NULL(hv_vhca);
>>
>> - return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
>> - mlx5_hv_vhca_invalidate);
>> + err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
>> + mlx5_hv_vhca_invalidate);
>> + if (err)
>> + return err;
>> +
>> + agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
>> + if (IS_ERR_OR_NULL(agent)) {
>> + mlx5_hv_unregister_invalidate(hv_vhca->dev);
>> + return IS_ERR_OR_NULL(agent);
>> + }
>> +
>> + hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
>> +
>> + return 0;
>> }
>>
>> void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>> {
>> + struct mlx5_hv_vhca_agent *agent;
>> int i;
>>
>> if (IS_ERR_OR_NULL(hv_vhca))
>> return;
>>
>> + agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
>> + if (!IS_ERR_OR_NULL(agent))
>> + mlx5_hv_vhca_control_agent_destroy(agent);
>
> Can the agent be err ptr here?

Only NULL, will fix.

>
>> +
>> mutex_lock(&hv_vhca->agents_lock);
>> for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
>> WARN_ON(hv_vhca->agents[i]);
>
> With the comment above in mind, here you check only for not null

Comment above was right... after fixing it, all is aligned here.

>
>> @@ -135,6 +244,11 @@ void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>> mlx5_hv_unregister_invalidate(hv_vhca->dev);
>> }
>>
>> +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> + mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
>> +}
>> +
>> struct mlx5_hv_vhca_agent *
>> mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> enum mlx5_hv_vhca_agent_type type,
>> @@ -168,6 +282,8 @@ struct mlx5_hv_vhca_agent *
>> hv_vhca->agents[type] = agent;
>> mutex_unlock(&hv_vhca->agents_lock);
>>
>> + mlx5_hv_vhca_agents_update(hv_vhca);
>> +
>> return agent;
>> }
>>
>> @@ -189,6 +305,8 @@ void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> agent->cleanup(agent);
>>
>> kfree(agent);
>> +
>> + mlx5_hv_vhca_agents_update(hv_vhca);
>> }
>>
>> static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> index fa7ee85..6f4bfb1 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> @@ -12,6 +12,7 @@
>> struct mlx5_hv_vhca_control_block;
>>
>> enum mlx5_hv_vhca_agent_type {
>> + MLX5_HV_VHCA_AGENT_CONTROL = 0,
>
> No need to start value

I find it more easy to read when having the value explicitly.
If you or Saeed has a strong opinion against it, this can be easily fixed.

>
>> MLX5_HV_VHCA_AGENT_MAX = 32,
>> };
>>
>>
>
> Mark
>

2019-08-15 11:50:28

by Eran Ben Elisha

[permalink] [raw]
Subject: Re: [PATCH net-next, 4/6] net/mlx5: Add HV VHCA infrastructure



On 8/14/2019 11:41 PM, Mark Bloch wrote:
>
>
> On 8/14/19 12:08 PM, Haiyang Zhang wrote:
>> From: Eran Ben Elisha <[email protected]>
>>
>> HV VHCA is a layer which provides PF to VF communication channel based on
>> HyperV PCI config channel. It implements Mellanox's Inter VHCA control
>> communication protocol. The protocol contains control block in order to
>> pass messages between the PF and VF drivers, and data blocks in order to
>> pass actual data.
>>
>> The infrastructure is agent based. Each agent will be responsible of
>> contiguous buffer blocks in the VHCA config space. This infrastructure will
>> bind agents to their blocks, and those agents can only access read/write
>> the buffer blocks assigned to them. Each agent will provide three
>> callbacks (control, invalidate, cleanup). Control will be invoked when
>> block-0 is invalidated with a command that concerns this agent. Invalidate
>> callback will be invoked if one of the blocks assigned to this agent was
>> invalidated. Cleanup will be invoked before the agent is being freed in
>> order to clean all of its open resources or deferred works.
>>
>> Block-0 serves as the control block. All execution commands from the PF
>> will be written by the PF over this block. VF will ack on those by
>> writing on block-0 as well. Its format is described by struct
>> mlx5_hv_vhca_control_block layout.
>>
>> Signed-off-by: Eran Ben Elisha <[email protected]>
>> Signed-off-by: Saeed Mahameed <[email protected]>
>> ---
>> drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +-
>> .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 247 +++++++++++++++++++++
>> .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 102 +++++++++
>> drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +
>> include/linux/mlx5/driver.h | 2 +
>> 5 files changed, 359 insertions(+), 1 deletion(-)
>> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> index a8950b1..e0a1056 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> @@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
>> mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
>> mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
>> mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
>> -mlx5_core-$(CONFIG_PCI_HYPERV_MINI) += lib/hv.o
>> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI)+= lib/hv.o lib/hv_vhca.o
>>
>> #
>> # Ipoib netdev
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> new file mode 100644
>> index 0000000..b2eebdf
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> @@ -0,0 +1,247 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>> +// Copyright (c) 2018 Mellanox Technologies
>> +
>> +#include <linux/hyperv.h>
>> +#include "mlx5_core.h"
>> +#include "lib/hv.h"
>> +#include "lib/hv_vhca.h"
>> +
>> +struct mlx5_hv_vhca {
>> + struct mlx5_core_dev *dev;
>> + struct workqueue_struct *work_queue;
>> + struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX];
>> + struct mutex agents_lock; /* Protect agents array */
>> +};
>> +
>> +struct mlx5_hv_vhca_work {
>> + struct work_struct invalidate_work;
>> + struct mlx5_hv_vhca *hv_vhca;
>> + u64 block_mask;
>> +};
>> +
>> +struct mlx5_hv_vhca_data_block {
>> + u16 sequence;
>> + u16 offset;
>> + u8 reserved[4];
>> + u64 data[15];
>> +};
>> +
>> +struct mlx5_hv_vhca_agent {
>> + enum mlx5_hv_vhca_agent_type type;
>> + struct mlx5_hv_vhca *hv_vhca;
>> + void *priv;
>> + int seq;
> Why is this int? and in data block is u16?

No good reason. Should be changed to u16.

>
>> + void (*control)(struct mlx5_hv_vhca_agent *agent,
>> + struct mlx5_hv_vhca_control_block *block);
>> + void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
>> + u64 block_mask);
>> + void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
>> +};
>> +
>> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
>> +{
>> + struct mlx5_hv_vhca *hv_vhca = NULL;
>> +
>> + hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
>> + if (!hv_vhca)
>> + return ERR_PTR(-ENOMEM);
>> +
>> + hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
>> + if (!hv_vhca->work_queue) {
>> + kfree(hv_vhca);
>> + return ERR_PTR(-ENOMEM);
>> + }
>> +
>> + hv_vhca->dev = dev;
>> + mutex_init(&hv_vhca->agents_lock);
>> +
>> + return hv_vhca;
>> +}
>> +
>> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> + if (IS_ERR_OR_NULL(hv_vhca))
>> + return;
>> +
>> + flush_workqueue(hv_vhca->work_queue);
>> + destroy_workqueue(hv_vhca->work_queue);
>
> Why not just destroy?

Will fix.

>
>> + kfree(hv_vhca);
>> +}
>> +
>> +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
>> +{
>> + struct mlx5_hv_vhca_work *hwork;
>> + struct mlx5_hv_vhca *hv_vhca;
>> + int i;
>> +
>> + hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
>> + hv_vhca = hwork->hv_vhca;
>> +
>> + mutex_lock(&hv_vhca->agents_lock);
>> + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
>> + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
>> +
>> + if (!agent || !agent->invalidate)
>> + continue;
>> +
>> + if (!(BIT(agent->type) & hwork->block_mask))
>> + continue;
>> +
>> + agent->invalidate(agent, hwork->block_mask);
>> + }
>> + mutex_unlock(&hv_vhca->agents_lock);
>> +
>> + kfree(hwork);
>> +}
>> +
>> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
>> +{
>> + struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
>> + struct mlx5_hv_vhca_work *work;
>> +
>> + work = kzalloc(sizeof(*work), GFP_ATOMIC);
>> + if (!work)
>> + return;
>> +
>> + INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
>> + work->hv_vhca = hv_vhca;
>> + work->block_mask = block_mask;
>> +
>> + queue_work(hv_vhca->work_queue, &work->invalidate_work);
>> +}
>> +
>> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> + if (IS_ERR_OR_NULL(hv_vhca))
>> + return IS_ERR_OR_NULL(hv_vhca);
>> +
>> + return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
>> + mlx5_hv_vhca_invalidate);
>> +}
>> +
>> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> + int i;
>> +
>> + if (IS_ERR_OR_NULL(hv_vhca))
>> + return;
>> +
>> + mutex_lock(&hv_vhca->agents_lock);
>> + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
>> + WARN_ON(hv_vhca->agents[i]);
>> +
>> + mutex_unlock(&hv_vhca->agents_lock);
>> +
>> + mlx5_hv_unregister_invalidate(hv_vhca->dev);
>> +}
>> +
>> +struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> + enum mlx5_hv_vhca_agent_type type,
>> + void (*control)(struct mlx5_hv_vhca_agent*,
>> + struct mlx5_hv_vhca_control_block *block),
>> + void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> + u64 block_mask),
>> + void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
>> + void *priv)
>> +{
>> + struct mlx5_hv_vhca_agent *agent;
>> +
>> + if (IS_ERR_OR_NULL(hv_vhca))
>> + return ERR_PTR(-ENOMEM);
>> +
>> + if (hv_vhca->agents[type])
>> + return ERR_PTR(-EINVAL);> +
>> + agent = kzalloc(sizeof(*agent), GFP_KERNEL);
>> + if (!agent)
>> + return ERR_PTR(-ENOMEM);
>> +
>> + agent->type = type;
>> + agent->hv_vhca = hv_vhca;
>> + agent->priv = priv;
>> + agent->control = control;
>> + agent->invalidate = invalidate;
>> + agent->cleanup = cleaup;
>> +
>> + mutex_lock(&hv_vhca->agents_lock);
>> + hv_vhca->agents[type] = agent;
>> + mutex_unlock(&hv_vhca->agents_lock);
>
> You have a check for this not under a lock a few lines up,
> but assign under a lock?

good point, will add.

>
> Mark
>
>> +
>> + return agent;
>> +}
>> +
>> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> +{
>> + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
>> +
>> + mutex_lock(&hv_vhca->agents_lock);
>> +
>> + if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
>> + mutex_unlock(&hv_vhca->agents_lock);
>> + return;
>> + }
>> +
>> + hv_vhca->agents[agent->type] = NULL;
>> + mutex_unlock(&hv_vhca->agents_lock);
>> +
>> + if (agent->cleanup)
>> + agent->cleanup(agent);
>> +
>> + kfree(agent);
>> +}
>> +
>> +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
>> + struct mlx5_hv_vhca_data_block *data_block,
>> + void *src, int len, int *offset)
>> +{
>> + int bytes = min_t(int, (int)sizeof(data_block->data), len);
>> +
>> + data_block->sequence = agent->seq;
>> + data_block->offset = (*offset)++;
>> + memcpy(data_block->data, src, bytes);
>> +
>> + return bytes;
>> +}
>> +
>> +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
>> +{
>> + agent->seq++;
>> +}
>> +
>> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
>> + void *buf, int len)
>> +{
>> + int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
>> + int block_offset = 0;
>> + int total = 0;
>> + int err;
>> +
>> + while (len) {
>> + struct mlx5_hv_vhca_data_block data_block = {0};
>> + int bytes;
>> +
>> + bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
>> + buf + total,
>> + len, &block_offset);
>> + if (!bytes)
>> + return -ENOMEM;
>> +
>> + err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
>> + sizeof(data_block), offset);
>> + if (err)
>> + return err;
>> +
>> + total += bytes;
>> + len -= bytes;
>> + }
>> +
>> + mlx5_hv_vhca_agent_seq_update(agent);
>> +
>> + return 0;
>> +}
>> +
>> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
>> +{
>> + return agent->priv;
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> new file mode 100644
>> index 0000000..fa7ee85
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> @@ -0,0 +1,102 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
>> +/* Copyright (c) 2019 Mellanox Technologies. */
>> +
>> +#ifndef __LIB_HV_VHCA_H__
>> +#define __LIB_HV_VHCA_H__
>> +
>> +#include "en.h"
>> +#include "lib/hv.h"
>> +
>> +struct mlx5_hv_vhca_agent;
>> +struct mlx5_hv_vhca;
>> +struct mlx5_hv_vhca_control_block;
>> +
>> +enum mlx5_hv_vhca_agent_type {
>> + MLX5_HV_VHCA_AGENT_MAX = 32,
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
>> +
>> +struct mlx5_hv_vhca_control_block {
>> + u32 capabilities;
>> + u32 control;
>> + u16 command;
>> + u16 command_ack;
>> + u16 version;
>> + u16 rings;
>> + u32 reserved1[28];
>> +};
>> +
>> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
>> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
>> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
>> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
>> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
>> +
>> +struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> + enum mlx5_hv_vhca_agent_type type,
>> + void (*control)(struct mlx5_hv_vhca_agent*,
>> + struct mlx5_hv_vhca_control_block *block),
>> + void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> + u64 block_mask),
>> + void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
>> + void *context);
>> +
>> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
>> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
>> + void *buf, int len);
>> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
>> +
>> +#else
>> +
>> +static inline struct mlx5_hv_vhca *
>> +mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
>> +{
>> + return NULL;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +}
>> +
>> +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +}
>> +
>> +static inline void mlx5_hv_vhca_invalidate(void *context,
>> + u64 block_mask)
>> +{
>> +}
>> +
>> +static inline struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> + enum mlx5_hv_vhca_agent_type type,
>> + void (*control)(struct mlx5_hv_vhca_agent*,
>> + struct mlx5_hv_vhca_control_block *block),
>> + void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> + u64 block_mask),
>> + void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
>> + void *context)
>> +{
>> + return NULL;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +}
>> +
>> +static inline int
>> +mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
>> + void *buf, int len)
>> +{
>> + return 0;
>> +}
>> +#endif
>> +
>> +#endif /* __LIB_HV_VHCA_H__ */
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> index 4cc90eb..50ee38b 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> @@ -69,6 +69,7 @@
>> #include "lib/pci_vsc.h"
>> #include "diag/fw_tracer.h"
>> #include "ecpf.h"
>> +#include "lib/hv_vhca.h"
>>
>> MODULE_AUTHOR("Eli Cohen <[email protected]>");
>> MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
>> @@ -872,6 +873,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>> }
>>
>> dev->tracer = mlx5_fw_tracer_create(dev);
>> + dev->hv_vhca = mlx5_hv_vhca_create(dev);
>>
>> return 0;
>>
>> @@ -902,6 +904,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>>
>> static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
>> {
>> + mlx5_hv_vhca_destroy(dev->hv_vhca);
>> mlx5_fw_tracer_destroy(dev->tracer);
>> mlx5_fpga_cleanup(dev);
>> mlx5_eswitch_cleanup(dev->priv.eswitch);
>> @@ -1068,6 +1071,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
>> goto err_fw_tracer;
>> }
>>
>> + mlx5_hv_vhca_init(dev->hv_vhca);
>> +
>> err = mlx5_fpga_device_start(dev);
>> if (err) {
>> mlx5_core_err(dev, "fpga device start failed %d\n", err);
>> @@ -1123,6 +1128,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
>> err_ipsec_start:
>> mlx5_fpga_device_stop(dev);
>> err_fpga_start:
>> + mlx5_hv_vhca_cleanup(dev->hv_vhca);
>> mlx5_fw_tracer_cleanup(dev->tracer);
>> err_fw_tracer:
>> mlx5_eq_table_destroy(dev);
>> @@ -1143,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
>> mlx5_accel_ipsec_cleanup(dev);
>> mlx5_accel_tls_cleanup(dev);
>> mlx5_fpga_device_stop(dev);
>> + mlx5_hv_vhca_cleanup(dev->hv_vhca);
>> mlx5_fw_tracer_cleanup(dev->tracer);
>> mlx5_eq_table_destroy(dev);
>> mlx5_irq_table_destroy(dev);
>> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
>> index 2b84ee9..97bb98c 100644
>> --- a/include/linux/mlx5/driver.h
>> +++ b/include/linux/mlx5/driver.h
>> @@ -646,6 +646,7 @@ struct mlx5_clock {
>> struct mlx5_fw_tracer;
>> struct mlx5_vxlan;
>> struct mlx5_geneve;
>> +struct mlx5_hv_vhca;
>>
>> struct mlx5_core_dev {
>> struct device *device;
>> @@ -693,6 +694,7 @@ struct mlx5_core_dev {
>> struct mlx5_ib_clock_info *clock_info;
>> struct mlx5_fw_tracer *tracer;
>> u32 vsc_addr;
>> + struct mlx5_hv_vhca *hv_vhca;
>> };
>>
>> struct mlx5_db {
>>

2019-08-15 12:15:01

by Eran Ben Elisha

[permalink] [raw]
Subject: Re: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations



On 8/14/2019 11:41 PM, Mark Bloch wrote:
>
>
> On 8/14/19 12:08 PM, Haiyang Zhang wrote:
>> From: Eran Ben Elisha <[email protected]>
>>
>> Add wrapper functions for HyperV PCIe read / write /
>> block_invalidate_register operations. This will be used as an
>> infrastructure in the downstream patch for software communication.
>>
>> This will be enabled by default if CONFIG_PCI_HYPERV_MINI is set.
>>
>> Signed-off-by: Eran Ben Elisha <[email protected]>
>> Signed-off-by: Saeed Mahameed <[email protected]>
>> ---
>> drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
>> drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
>> drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
>> 3 files changed, 87 insertions(+)
>> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> index 8b7edaa..a8950b1 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> @@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
>> mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
>> mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
>> mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
>> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI) += lib/hv.o
>>
>> #
>> # Ipoib netdev
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>> new file mode 100644
>> index 0000000..cf08d02
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>> @@ -0,0 +1,64 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>> +// Copyright (c) 2018 Mellanox Technologies
>> +
>> +#include <linux/hyperv.h>
>> +#include "mlx5_core.h"
>> +#include "lib/hv.h"
>> +
>> +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
>> + int offset, bool read)
>> +{
>> + int rc = -EOPNOTSUPP;
>> + int bytes_returned;
>> + int block_id;
>> +
>> + if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
>> + return -EINVAL;
>> +
>> + block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
>> +
>> + rc = read ?
>> + hyperv_read_cfg_blk(dev->pdev, buf,
>> + HV_CONFIG_BLOCK_SIZE_MAX, block_id,
>> + &bytes_returned) :
>> + hyperv_write_cfg_blk(dev->pdev, buf,
>> + HV_CONFIG_BLOCK_SIZE_MAX, block_id);
>> +
>> + /* Make sure len bytes were read successfully */
>> + if (read)
>> + rc |= !(len == bytes_returned);
>> +
>> + if (rc) {
>> + mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
>> + read ? "read" : "write", rc, len,
>> + offset);
>> + return rc;
>> + }
>> +
>> + return 0;
>> +}
>
> This seems out of place why not expose this function as part of hyperv and mlx5
> will just pass the pdev.
>
The HV driver works with block chunks. I found it less convenience to do
so directly, so I add a small wrapper for mlx5 core.

Haiyangz,
Do you see a reason to export this callback style from the HYPERV level
instead?

>> +
>> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
>> + int offset)
>> +{
>> + return mlx5_hv_config_common(dev, buf, len, offset, true);
>> +}
>> +
>> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
>> + int offset)
>> +{
>> + return mlx5_hv_config_common(dev, buf, len, offset, false);
>> +}
>> +
>> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
>> + void (*block_invalidate)(void *context,
>> + u64 block_mask))
>> +{
>> + return hyperv_reg_block_invalidate(dev->pdev, context,
>> + block_invalidate);
>> +}
>> +
>> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
>> +{
>> + hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>> new file mode 100644
>> index 0000000..7f69771
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>> @@ -0,0 +1,22 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
>> +/* Copyright (c) 2019 Mellanox Technologies. */
>> +
>> +#ifndef __LIB_HV_H__
>> +#define __LIB_HV_H__
>> +
>> +#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
>> +
>> +#include <linux/hyperv.h>
>> +#include <linux/mlx5/driver.h>
>> +
>> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
>> + int offset);
>> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
>> + int offset);
>> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
>> + void (*block_invalidate)(void *context,
>> + u64 block_mask));
>> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
>> +#endif
>> +
>> +#endif /* __LIB_HV_H__ */
>>
>
> Mark
>

2019-08-16 12:30:05

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface

Haiyang Zhang <[email protected]> writes:

> This mini driver is a helper driver allows other drivers to
> have a common interface with the Hyper-V PCI frontend driver.
>
> Signed-off-by: Haiyang Zhang <[email protected]>
> Signed-off-by: Saeed Mahameed <[email protected]>
> ---
> MAINTAINERS | 1 +
> drivers/pci/Kconfig | 1 +
> drivers/pci/controller/Kconfig | 7 ++++
> drivers/pci/controller/Makefile | 1 +
> drivers/pci/controller/pci-hyperv-mini.c | 70 ++++++++++++++++++++++++++++++++
> drivers/pci/controller/pci-hyperv.c | 12 ++++--
> include/linux/hyperv.h | 30 ++++++++++----
> 7 files changed, 111 insertions(+), 11 deletions(-)
> create mode 100644 drivers/pci/controller/pci-hyperv-mini.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index e352550..c4962b9 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -7453,6 +7453,7 @@ F: drivers/hid/hid-hyperv.c
> F: drivers/hv/
> F: drivers/input/serio/hyperv-keyboard.c
> F: drivers/pci/controller/pci-hyperv.c
> +F: drivers/pci/controller/pci-hyperv-mini.c
> F: drivers/net/hyperv/
> F: drivers/scsi/storvsc_drv.c
> F: drivers/uio/uio_hv_generic.c
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index 2ab9240..bb852f5 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -182,6 +182,7 @@ config PCI_LABEL
> config PCI_HYPERV
> tristate "Hyper-V PCI Frontend"
> depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
> + select PCI_HYPERV_MINI
> help
> The PCI device frontend driver allows the kernel to import arbitrary
> PCI devices from a PCI backend to support PCI driver domains.
> diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
> index fe9f9f1..8e31cba 100644
> --- a/drivers/pci/controller/Kconfig
> +++ b/drivers/pci/controller/Kconfig
> @@ -281,5 +281,12 @@ config VMD
> To compile this driver as a module, choose M here: the
> module will be called vmd.
>
> +config PCI_HYPERV_MINI
> + tristate "Hyper-V PCI Mini"
> + depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
> + help
> + The Hyper-V PCI Mini is a helper driver allows other drivers to
> + have a common interface with the Hyper-V PCI frontend driver.
> +

Out of pure curiosity, why not just export this interface from
PCI_HYPERV directly? Why do we need this stub?

> source "drivers/pci/controller/dwc/Kconfig"
> endmenu
> diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
> index d56a507..77e0132 100644
> --- a/drivers/pci/controller/Makefile
> +++ b/drivers/pci/controller/Makefile
> @@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
> obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
> obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
> obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
> +obj-$(CONFIG_PCI_HYPERV_MINI) += pci-hyperv-mini.o
> obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
> obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
> obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
> diff --git a/drivers/pci/controller/pci-hyperv-mini.c b/drivers/pci/controller/pci-hyperv-mini.c
> new file mode 100644
> index 0000000..9b6cd1c
> --- /dev/null
> +++ b/drivers/pci/controller/pci-hyperv-mini.c
> @@ -0,0 +1,70 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) Microsoft Corporation.
> + *
> + * Author:
> + * Haiyang Zhang <[email protected]>
> + *
> + * This mini driver is a helper driver allows other drivers to
> + * have a common interface with the Hyper-V PCI frontend driver.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/hyperv.h>
> +
> +struct hyperv_pci_block_ops hvpci_block_ops;
> +EXPORT_SYMBOL(hvpci_block_ops);
> +
> +int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
> + unsigned int block_id, unsigned int *bytes_returned)
> +{
> + if (!hvpci_block_ops.read_block)
> + return -EOPNOTSUPP;
> +
> + return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
> + bytes_returned);
> +}
> +EXPORT_SYMBOL(hyperv_read_cfg_blk);
> +
> +int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
> + unsigned int block_id)
> +{
> + if (!hvpci_block_ops.write_block)
> + return -EOPNOTSUPP;
> +
> + return hvpci_block_ops.write_block(dev, buf, len, block_id);
> +}
> +EXPORT_SYMBOL(hyperv_write_cfg_blk);
> +
> +int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
> + void (*block_invalidate)(void *context,
> + u64 block_mask))
> +{
> + if (!hvpci_block_ops.reg_blk_invalidate)
> + return -EOPNOTSUPP;
> +
> + return hvpci_block_ops.reg_blk_invalidate(dev, context,
> + block_invalidate);
> +}
> +EXPORT_SYMBOL(hyperv_reg_block_invalidate);
> +
> +static void __exit exit_hv_pci_mini(void)
> +{
> + pr_info("unloaded\n");
> +}
> +
> +static int __init init_hv_pci_mini(void)
> +{
> + pr_info("loaded\n");
> +
> + return 0;
> +}
> +
> +module_init(init_hv_pci_mini);
> +module_exit(exit_hv_pci_mini);
> +
> +MODULE_DESCRIPTION("Hyper-V PCI Mini");
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 57adeca..9c93ac2 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
> *bytes_returned = comp_pkt.bytes_returned;
> return 0;
> }
> -EXPORT_SYMBOL(hv_read_config_block);
>
> /**
> * hv_pci_write_config_compl() - Invoked when a response packet for a write
> @@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
>
> return 0;
> }
> -EXPORT_SYMBOL(hv_write_config_block);
>
> /**
> * hv_register_block_invalidate() - Invoked when a config block invalidation
> @@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
> return 0;
>
> }
> -EXPORT_SYMBOL(hv_register_block_invalidate);
>
> /* Interrupt management hooks */
> static void hv_int_desc_free(struct hv_pci_dev *hpdev,
> @@ -3045,10 +3042,19 @@ static int hv_pci_remove(struct hv_device *hdev)
> static void __exit exit_hv_pci_drv(void)
> {
> vmbus_driver_unregister(&hv_pci_drv);
> +
> + hvpci_block_ops.read_block = NULL;
> + hvpci_block_ops.write_block = NULL;
> + hvpci_block_ops.reg_blk_invalidate = NULL;
> }
>
> static int __init init_hv_pci_drv(void)
> {
> + /* Initialize PCI block r/w interface */
> + hvpci_block_ops.read_block = hv_read_config_block;
> + hvpci_block_ops.write_block = hv_write_config_block;
> + hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
> +
> return vmbus_driver_register(&hv_pci_drv);
> }
>
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 9d37f8c..2afe6fd 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -1579,18 +1579,32 @@ struct vmpacket_descriptor *
> pkt = hv_pkt_iter_next(channel, pkt))
>
> /*
> - * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
> + * Interface for passing data between SR-IOV PF and VF drivers. The VF driver
> * sends requests to read and write blocks. Each block must be 128 bytes or
> * smaller. Optionally, the VF driver can register a callback function which
> * will be invoked when the host says that one or more of the first 64 block
> * IDs is "invalid" which means that the VF driver should reread them.
> */
> #define HV_CONFIG_BLOCK_SIZE_MAX 128
> -int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
> - unsigned int block_id, unsigned int *bytes_returned);
> -int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
> - unsigned int block_id);
> -int hv_register_block_invalidate(struct pci_dev *dev, void *context,
> - void (*block_invalidate)(void *context,
> - u64 block_mask));
> +
> +int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
> + unsigned int block_id, unsigned int *bytes_returned);
> +int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
> + unsigned int block_id);
> +int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
> + void (*block_invalidate)(void *context,
> + u64 block_mask));
> +
> +struct hyperv_pci_block_ops {
> + int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
> + unsigned int block_id, unsigned int *bytes_returned);
> + int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
> + unsigned int block_id);
> + int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
> + void (*block_invalidate)(void *context,
> + u64 block_mask));
> +};
> +
> +extern struct hyperv_pci_block_ops hvpci_block_ops;
> +
> #endif /* _HYPERV_H */

--
Vitaly

2019-08-16 14:49:48

by Haiyang Zhang

[permalink] [raw]
Subject: RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface



> -----Original Message-----
> From: Vitaly Kuznetsov <[email protected]>
> Sent: Friday, August 16, 2019 8:28 AM
> To: Haiyang Zhang <[email protected]>; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]
> Cc: Haiyang Zhang <[email protected]>; KY Srinivasan
> <[email protected]>; Stephen Hemminger <[email protected]>;
> [email protected]
> Subject: Re: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for
> software backchannel interface
>
> Haiyang Zhang <[email protected]> writes:
>
> > This mini driver is a helper driver allows other drivers to have a
> > common interface with the Hyper-V PCI frontend driver.
> >
> > Signed-off-by: Haiyang Zhang <[email protected]>
> > Signed-off-by: Saeed Mahameed <[email protected]>
> > ---
> > MAINTAINERS | 1 +
> > drivers/pci/Kconfig | 1 +
> > drivers/pci/controller/Kconfig | 7 ++++
> > drivers/pci/controller/Makefile | 1 +
> > drivers/pci/controller/pci-hyperv-mini.c | 70
> ++++++++++++++++++++++++++++++++
> > drivers/pci/controller/pci-hyperv.c | 12 ++++--
> > include/linux/hyperv.h | 30 ++++++++++----
> > 7 files changed, 111 insertions(+), 11 deletions(-) create mode
> > 100644 drivers/pci/controller/pci-hyperv-mini.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS index e352550..c4962b9 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -7453,6 +7453,7 @@ F: drivers/hid/hid-hyperv.c
> > F: drivers/hv/
> > F: drivers/input/serio/hyperv-keyboard.c
> > F: drivers/pci/controller/pci-hyperv.c
> > +F: drivers/pci/controller/pci-hyperv-mini.c
> > F: drivers/net/hyperv/
> > F: drivers/scsi/storvsc_drv.c
> > F: drivers/uio/uio_hv_generic.c
> > diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index
> > 2ab9240..bb852f5 100644
> > --- a/drivers/pci/Kconfig
> > +++ b/drivers/pci/Kconfig
> > @@ -182,6 +182,7 @@ config PCI_LABEL
> > config PCI_HYPERV
> > tristate "Hyper-V PCI Frontend"
> > depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN
> &&
> > X86_64
> > + select PCI_HYPERV_MINI
> > help
> > The PCI device frontend driver allows the kernel to import arbitrary
> > PCI devices from a PCI backend to support PCI driver domains.
> > diff --git a/drivers/pci/controller/Kconfig
> > b/drivers/pci/controller/Kconfig index fe9f9f1..8e31cba 100644
> > --- a/drivers/pci/controller/Kconfig
> > +++ b/drivers/pci/controller/Kconfig
> > @@ -281,5 +281,12 @@ config VMD
> > To compile this driver as a module, choose M here: the
> > module will be called vmd.
> >
> > +config PCI_HYPERV_MINI
> > + tristate "Hyper-V PCI Mini"
> > + depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN
> && X86_64
> > + help
> > + The Hyper-V PCI Mini is a helper driver allows other drivers to
> > + have a common interface with the Hyper-V PCI frontend driver.
> > +
>
> Out of pure curiosity, why not just export this interface from PCI_HYPERV
> directly? Why do we need this stub?

The pci_hyperv can only be loaded on VMs on Hyper-V and Azure. Other
drivers like MLX5e will have symbolic dependency of pci_hyperv if they
use functions exported by pci_hyperv. This dependency will cause other
drivers fail to load on other platforms, like VMs on KVM. So we created
this mini driver, which can be loaded on any platforms to provide the
symbolic dependency.

Thanks,
- Haiyang

2019-08-16 16:17:21

by Vitaly Kuznetsov

[permalink] [raw]
Subject: RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface

Haiyang Zhang <[email protected]> writes:

>
> The pci_hyperv can only be loaded on VMs on Hyper-V and Azure. Other
> drivers like MLX5e will have symbolic dependency of pci_hyperv if they
> use functions exported by pci_hyperv. This dependency will cause other
> drivers fail to load on other platforms, like VMs on KVM. So we created
> this mini driver, which can be loaded on any platforms to provide the
> symbolic dependency.

(/me wondering is there a nicer way around this, by using __weak or
something like that...)

In case this stub is the best solution I'd suggest to rename it to
something like PCI_HYPERV_INTERFACE to make it clear it is not a
separate driver (_MINI makes me think so).

--
Vitaly

2019-08-16 19:51:10

by Haiyang Zhang

[permalink] [raw]
Subject: RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface



> -----Original Message-----
> From: Vitaly Kuznetsov <[email protected]>
> Sent: Friday, August 16, 2019 12:16 PM
> To: Haiyang Zhang <[email protected]>
> Cc: KY Srinivasan <[email protected]>; Stephen Hemminger
> <[email protected]>; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; linux-
> [email protected]; [email protected]
> Subject: RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for
> software backchannel interface
>
> Haiyang Zhang <[email protected]> writes:
>
> >
> > The pci_hyperv can only be loaded on VMs on Hyper-V and Azure. Other
> > drivers like MLX5e will have symbolic dependency of pci_hyperv if they
> > use functions exported by pci_hyperv. This dependency will cause other
> > drivers fail to load on other platforms, like VMs on KVM. So we
> > created this mini driver, which can be loaded on any platforms to
> > provide the symbolic dependency.
>
> (/me wondering is there a nicer way around this, by using __weak or
> something like that...)
>
> In case this stub is the best solution I'd suggest to rename it to something like
> PCI_HYPERV_INTERFACE to make it clear it is not a separate driver (_MINI
> makes me think so).

Thanks! I will consider those options.

2019-08-19 15:05:42

by Haiyang Zhang

[permalink] [raw]
Subject: RE: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations



> -----Original Message-----
> From: Eran Ben Elisha <[email protected]>
> Sent: Thursday, August 15, 2019 7:35 AM
> To: Mark Bloch <[email protected]>; Haiyang Zhang
> <[email protected]>; [email protected]; [email protected];
> Saeed Mahameed <[email protected]>; [email protected];
> [email protected]; [email protected]; linux-
> [email protected]; [email protected]; [email protected]
> Cc: KY Srinivasan <[email protected]>; Stephen Hemminger
> <[email protected]>; [email protected]
> Subject: Re: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe
> operations
>
>
>
> On 8/14/2019 11:41 PM, Mark Bloch wrote:
> >
> >
> > On 8/14/19 12:08 PM, Haiyang Zhang wrote:
> >> From: Eran Ben Elisha <[email protected]>
> >>
> >> Add wrapper functions for HyperV PCIe read / write /
> >> block_invalidate_register operations. This will be used as an
> >> infrastructure in the downstream patch for software communication.
> >>
> >> This will be enabled by default if CONFIG_PCI_HYPERV_MINI is set.
> >>
> >> Signed-off-by: Eran Ben Elisha <[email protected]>
> >> Signed-off-by: Saeed Mahameed <[email protected]>
> >> ---
> >> drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
> >> drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64
> ++++++++++++++++++++++++
> >> drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
> >> 3 files changed, 87 insertions(+)
> >> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> >> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
> >>
> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> >> index 8b7edaa..a8950b1 100644
> >> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> >> @@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) +=
> eswitch.o eswitch_offloads.o eswitch_offlo
> >> mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
> >> mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
> >> mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> >> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI) += lib/hv.o
> >>
> >> #
> >> # Ipoib netdev
> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> >> new file mode 100644
> >> index 0000000..cf08d02
> >> --- /dev/null
> >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> >> @@ -0,0 +1,64 @@
> >> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> >> +// Copyright (c) 2018 Mellanox Technologies
> >> +
> >> +#include <linux/hyperv.h>
> >> +#include "mlx5_core.h"
> >> +#include "lib/hv.h"
> >> +
> >> +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void
> *buf, int len,
> >> + int offset, bool read)
> >> +{
> >> + int rc = -EOPNOTSUPP;
> >> + int bytes_returned;
> >> + int block_id;
> >> +
> >> + if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len %
> HV_CONFIG_BLOCK_SIZE_MAX)
> >> + return -EINVAL;
> >> +
> >> + block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
> >> +
> >> + rc = read ?
> >> + hyperv_read_cfg_blk(dev->pdev, buf,
> >> + HV_CONFIG_BLOCK_SIZE_MAX, block_id,
> >> + &bytes_returned) :
> >> + hyperv_write_cfg_blk(dev->pdev, buf,
> >> + HV_CONFIG_BLOCK_SIZE_MAX, block_id);
> >> +
> >> + /* Make sure len bytes were read successfully */
> >> + if (read)
> >> + rc |= !(len == bytes_returned);
> >> +
> >> + if (rc) {
> >> + mlx5_core_err(dev, "Failed to %s hv config, err = %d, len
> = %d, offset = %d\n",
> >> + read ? "read" : "write", rc, len,
> >> + offset);
> >> + return rc;
> >> + }
> >> +
> >> + return 0;
> >> +}
> >
> > This seems out of place why not expose this function as part of hyperv and
> mlx5
> > will just pass the pdev.
> >
> The HV driver works with block chunks. I found it less convenience to do
> so directly, so I add a small wrapper for mlx5 core.
>
> Haiyangz,
> Do you see a reason to export this callback style from the HYPERV level
> instead?
I don’t think the wrapper has to be in the hv interface.
One function for read, another for write, are pretty straight forward.
Users (other drivers) may use or wrap them in the way they like to.

Thanks,
- Haiyang