2024-04-02 03:29:39

by liulongfang

[permalink] [raw]
Subject: [PATCH v4 0/4] add debugfs to hisilicon migration driver

Add a debugfs function to the hisilicon migration driver in VFIO to
provide intermediate state values and data during device migration.

When the execution of live migration fails, the user can view the
status and data during the migration process separately from the
source and the destination, which is convenient for users to analyze
and locate problems.

Changes v3 -> v4
Rebased on kernel6.9

Changes v2 -> v3
Solve debugfs serialization problem.

Changes v1 -> v2
Solve the racy problem of io_base.

Longfang Liu (4):
hisi_acc_vfio_pci: extract public functions for container_of
hisi_acc_vfio_pci: Create subfunction for data reading
hisi_acc_vfio_pci: register debugfs for hisilicon migration driver
Documentation: add debugfs description for hisi migration

.../ABI/testing/debugfs-hisi-migration | 34 +++
MAINTAINERS | 1 +
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 281 ++++++++++++++++--
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 14 +
4 files changed, 298 insertions(+), 32 deletions(-)
create mode 100644 Documentation/ABI/testing/debugfs-hisi-migration

--
2.24.0



2024-04-02 03:30:33

by liulongfang

[permalink] [raw]
Subject: [PATCH v4 1/4] hisi_acc_vfio_pci: extract public functions for container_of

In the current driver, vdev is obtained from struct
hisi_acc_vf_core_device through the container_of function.
This method is used in many places in the driver. In order to
reduce this repetitive operation, It was extracted into
a public function.

Signed-off-by: Longfang Liu <[email protected]>
---
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 21 ++++++++++---------
1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 9a3e97108ace..45351be8e270 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -630,6 +630,12 @@ static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vde
}
}

+static struct hisi_acc_vf_core_device *hisi_acc_get_vf_dev(struct vfio_device *vdev)
+{
+ return container_of(vdev, struct hisi_acc_vf_core_device,
+ core_device.vdev);
+}
+
static void hisi_acc_vf_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev)
{
hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
@@ -1033,8 +1039,7 @@ static struct file *
hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev,
enum vfio_device_mig_state new_state)
{
- struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev,
- struct hisi_acc_vf_core_device, core_device.vdev);
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
enum vfio_device_mig_state next_state;
struct file *res = NULL;
int ret;
@@ -1075,8 +1080,7 @@ static int
hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
enum vfio_device_mig_state *curr_state)
{
- struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev,
- struct hisi_acc_vf_core_device, core_device.vdev);
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);

mutex_lock(&hisi_acc_vdev->state_mutex);
*curr_state = hisi_acc_vdev->mig_state;
@@ -1280,8 +1284,7 @@ static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int

static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
{
- struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
- struct hisi_acc_vf_core_device, core_device.vdev);
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device;
int ret;

@@ -1304,8 +1307,7 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)

static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
{
- struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
- struct hisi_acc_vf_core_device, core_device.vdev);
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;

iounmap(vf_qm->io_base);
@@ -1320,8 +1322,7 @@ static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = {

static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
{
- struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
- struct hisi_acc_vf_core_device, core_device.vdev);
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
struct hisi_qm *pf_qm = hisi_acc_get_pf_qm(pdev);

--
2.24.0


2024-04-02 03:30:35

by liulongfang

[permalink] [raw]
Subject: [PATCH v4 2/4] hisi_acc_vfio_pci: Create subfunction for data reading

During the live migration process. It needs to obtain various status
data of drivers and devices. In order to facilitate calling it in the
debugfs function. For all operations that read data from device registers,
the driver creates a subfunction.
Also fixed the location of address data.

Signed-off-by: Longfang Liu <[email protected]>
---
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 56 +++++++++++--------
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 3 +
2 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 45351be8e270..bf358ba94b5d 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -486,6 +486,39 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return 0;
}

+static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)
+{
+ struct device *dev = &vf_qm->pdev->dev;
+ int ret;
+
+ ret = qm_get_regs(vf_qm, vf_data);
+ if (ret)
+ return -EINVAL;
+
+ /* Every reg is 32 bit, the dma address is 64 bit. */
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
+ vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
+ vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
+
+ /* Through SQC_BT/CQC_BT to get sqc and cqc address */
+ ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
+ if (ret) {
+ dev_err(dev, "failed to read SQC addr!\n");
+ return -EINVAL;
+ }
+
+ ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
+ if (ret) {
+ dev_err(dev, "failed to read CQC addr!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
struct hisi_acc_vf_migration_file *migf)
{
@@ -511,31 +544,10 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return ret;
}

- ret = qm_get_regs(vf_qm, vf_data);
+ ret = vf_qm_read_data(vf_qm, vf_data);
if (ret)
return -EINVAL;

- /* Every reg is 32 bit, the dma address is 64 bit. */
- vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
- vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
- vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
- vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
- vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
- vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
-
- /* Through SQC_BT/CQC_BT to get sqc and cqc address */
- ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
- if (ret) {
- dev_err(dev, "failed to read SQC addr!\n");
- return -EINVAL;
- }
-
- ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
- if (ret) {
- dev_err(dev, "failed to read CQC addr!\n");
- return -EINVAL;
- }
-
migf->total_length = sizeof(struct acc_vf_data);
return 0;
}
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 5bab46602fad..7a9dc87627cd 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -38,6 +38,9 @@
#define QM_REG_ADDR_OFFSET 0x0004

#define QM_XQC_ADDR_OFFSET 32U
+#define QM_XQC_ADDR_LOW 0x1
+#define QM_XQC_ADDR_HIGH 0x2
+
#define QM_VF_AEQ_INT_MASK 0x0004
#define QM_VF_EQ_INT_MASK 0x000c
#define QM_IFC_INT_SOURCE_V 0x0020
--
2.24.0


2024-04-02 03:31:09

by liulongfang

[permalink] [raw]
Subject: [PATCH v4 3/4] hisi_acc_vfio_pci: register debugfs for hisilicon migration driver

On the debugfs framework of VFIO, if the CONFIG_VFIO_DEBUGFS macro is
enabled, the debug function is registered for the live migration driver
of the HiSilicon accelerator device.

After registering the HiSilicon accelerator device on the debugfs
framework of live migration of vfio, a directory file "hisi_acc"
of debugfs is created, and then three debug function files are
created in this directory:

vfio
|
+---<dev_name1>
| +---migration
| +--state
| +--hisi_acc
| +--attr
| +--data
| +--save
| +--cmd_state
|
+---<dev_name2>
+---migration
+--state
+--hisi_acc
+--attr
+--data
+--save
+--cmd_state

data file: used to get the migration data from the driver
attr file: used to get device attributes parameters from the driver
save file: used to read the data of the live migration device and save
it to the driver.
cmd_state: used to get the cmd channel state for the device.

+----------------+ +--------------+ +---------------+
| migration dev | | src dev | | dst dev |
+-------+--------+ +------+-------+ +-------+-------+
| | |
| | |
| | |
| | |
save | +------v-------+ +-------v-------+
| | saving_mif | | resuming_migf |
| | file | | file |
| +------+-------+ +-------+-------+
| | |
| mutex | |
+-------v--------+ | |
| | | |
| debug_migf file<---------------+-----------------------+
| | copy
+-------+--------+
|
cat |
|
+-------v--------+
| user |
+----------------+

In debugfs scheme. The driver creates a separate debug_migf file.
It is completely separated from the two files of live migration,
thus preventing debugfs data from interfering with migration data.
Moreover, it only performs read operations on the device.

For serialization of debugfs:
First, it only writes data when performing a debugfs save operation.
Second, it is only copied from the file on the migration device
when the live migration is complete.
These two operations are mutually exclusive through mutex.

Signed-off-by: Longfang Liu <[email protected]>
---
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 204 ++++++++++++++++++
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 11 +
2 files changed, 215 insertions(+)

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index bf358ba94b5d..9f563a31a2a1 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -15,6 +15,7 @@
#include <linux/anon_inodes.h>

#include "hisi_acc_vfio_pci.h"
+#include "../../vfio.h"

/* Return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */
static int qm_wait_dev_not_ready(struct hisi_qm *qm)
@@ -618,6 +619,22 @@ hisi_acc_check_int_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
}
}

+static void hisi_acc_vf_migf_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct hisi_acc_vf_migration_file *src_migf)
+{
+ struct hisi_acc_vf_migration_file *dst_migf = hisi_acc_vdev->debug_migf;
+
+ if (!dst_migf)
+ return;
+
+ mutex_lock(&hisi_acc_vdev->enable_mutex);
+ dst_migf->disabled = src_migf->disabled;
+ dst_migf->total_length = src_migf->total_length;
+ memcpy(&dst_migf->vf_data, &src_migf->vf_data,
+ sizeof(struct acc_vf_data));
+ mutex_unlock(&hisi_acc_vdev->enable_mutex);
+}
+
static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
{
mutex_lock(&migf->lock);
@@ -630,12 +647,14 @@ static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vdev)
{
if (hisi_acc_vdev->resuming_migf) {
+ hisi_acc_vf_migf_save(hisi_acc_vdev, hisi_acc_vdev->resuming_migf);
hisi_acc_vf_disable_fd(hisi_acc_vdev->resuming_migf);
fput(hisi_acc_vdev->resuming_migf->filp);
hisi_acc_vdev->resuming_migf = NULL;
}

if (hisi_acc_vdev->saving_migf) {
+ hisi_acc_vf_migf_save(hisi_acc_vdev, hisi_acc_vdev->saving_migf);
hisi_acc_vf_disable_fd(hisi_acc_vdev->saving_migf);
fput(hisi_acc_vdev->saving_migf->filp);
hisi_acc_vdev->saving_migf = NULL;
@@ -1144,6 +1163,7 @@ static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
if (!vf_qm->io_base)
return -EIO;

+ mutex_init(&hisi_acc_vdev->enable_mutex);
vf_qm->fun_type = QM_HW_VF;
vf_qm->pdev = vf_dev;
mutex_init(&vf_qm->mailbox_lock);
@@ -1294,6 +1314,181 @@ static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int
return vfio_pci_core_ioctl(core_vdev, cmd, arg);
}

+static int hisi_acc_vf_debug_check(struct seq_file *seq, struct vfio_device *vdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
+ struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->debug_migf;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ int ret;
+
+ if (!vdev->mig_ops || !migf) {
+ seq_printf(seq, "%s\n", "device does not support live migration!");
+ return -EINVAL;
+ }
+
+ /**
+ * When the device is not opened, the io_base is not mapped.
+ * The driver cannot perform device read and write operations.
+ */
+ if (hisi_acc_vdev->dev_opened != DEV_OPEN) {
+ seq_printf(seq, "%s\n", "device not opened!");
+ return -EINVAL;
+ }
+
+ ret = qm_wait_dev_not_ready(vf_qm);
+ if (ret) {
+ seq_printf(seq, "%s\n", "VF device not ready!");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hisi_acc_vf_debug_cmd(struct seq_file *seq, void *data)
+{
+ struct device *vf_dev = seq->private;
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
+ struct vfio_device *vdev = &core_device->vdev;
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ u64 value;
+ int ret;
+
+ mutex_lock(&hisi_acc_vdev->enable_mutex);
+ ret = hisi_acc_vf_debug_check(seq, vdev);
+ if (ret) {
+ mutex_unlock(&hisi_acc_vdev->enable_mutex);
+ return 0;
+ }
+
+ value = readl(vf_qm->io_base + QM_MB_CMD_SEND_BASE);
+ mutex_unlock(&hisi_acc_vdev->enable_mutex);
+ seq_printf(seq, "%s:0x%llx\n", "mailbox cmd channel state is OK", value);
+
+ return 0;
+}
+
+static int hisi_acc_vf_debug_save(struct seq_file *seq, void *data)
+{
+ struct device *vf_dev = seq->private;
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
+ struct vfio_device *vdev = &core_device->vdev;
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
+ struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->debug_migf;
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ int ret;
+
+ mutex_lock(&hisi_acc_vdev->enable_mutex);
+ ret = hisi_acc_vf_debug_check(seq, vdev);
+ if (ret) {
+ mutex_unlock(&hisi_acc_vdev->enable_mutex);
+ return 0;
+ }
+
+ vf_data->vf_qm_state = QM_READY;
+ ret = vf_qm_read_data(&hisi_acc_vdev->vf_qm, vf_data);
+ if (ret) {
+ mutex_unlock(&hisi_acc_vdev->enable_mutex);
+ seq_printf(seq, "%s\n", "failed to save device data!");
+ return 0;
+ }
+
+ migf->total_length = sizeof(struct acc_vf_data);
+ mutex_unlock(&hisi_acc_vdev->enable_mutex);
+ seq_printf(seq, "%s\n", "successful to save device data!");
+
+ return 0;
+}
+
+static int hisi_acc_vf_data_read(struct seq_file *seq, void *data)
+{
+ struct device *vf_dev = seq->private;
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
+ struct vfio_device *vdev = &core_device->vdev;
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
+ struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf;
+ size_t vf_data_sz = offsetofend(struct acc_vf_data, padding);
+
+ if (debug_migf && debug_migf->total_length)
+ seq_hex_dump(seq, "Mig Data:", DUMP_PREFIX_OFFSET, 16, 1,
+ (unsigned char *)&debug_migf->vf_data,
+ vf_data_sz, false);
+ else
+ seq_printf(seq, "%s\n", "device not migrated!");
+
+ return 0;
+}
+
+static int hisi_acc_vf_attr_read(struct seq_file *seq, void *data)
+{
+ struct device *vf_dev = seq->private;
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
+ struct vfio_device *vdev = &core_device->vdev;
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
+ struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf;
+
+ if (debug_migf && debug_migf->total_length) {
+ seq_printf(seq,
+ "acc device:\n"
+ "device state: %d\n"
+ "device ready: %u\n"
+ "data valid: %d\n"
+ "data size: %lu\n",
+ hisi_acc_vdev->mig_state,
+ hisi_acc_vdev->vf_qm_state,
+ debug_migf->disabled,
+ debug_migf->total_length);
+ } else {
+ seq_printf(seq, "%s\n", "device not migrated!");
+ }
+
+ return 0;
+}
+
+static int hisi_acc_vfio_debug_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct vfio_device *vdev = &hisi_acc_vdev->core_device.vdev;
+ struct dentry *vfio_dev_migration = NULL;
+ struct dentry *vfio_hisi_acc = NULL;
+ struct device *dev = vdev->dev;
+ void *migf = NULL;
+
+ if (!debugfs_initialized())
+ return 0;
+
+ migf = kzalloc(sizeof(struct hisi_acc_vf_migration_file), GFP_KERNEL);
+ if (!migf)
+ return -ENOMEM;
+ hisi_acc_vdev->debug_migf = migf;
+
+ vfio_dev_migration = debugfs_lookup("migration", vdev->debug_root);
+ if (!vfio_dev_migration) {
+ kfree(migf);
+ dev_err(dev, "failed to lookup migration debugfs file!\n");
+ return -ENODEV;
+ }
+
+ vfio_hisi_acc = debugfs_create_dir("hisi_acc", vfio_dev_migration);
+ debugfs_create_devm_seqfile(dev, "data", vfio_hisi_acc,
+ hisi_acc_vf_data_read);
+ debugfs_create_devm_seqfile(dev, "attr", vfio_hisi_acc,
+ hisi_acc_vf_attr_read);
+ debugfs_create_devm_seqfile(dev, "cmd_state", vfio_hisi_acc,
+ hisi_acc_vf_debug_cmd);
+ debugfs_create_devm_seqfile(dev, "save", vfio_hisi_acc,
+ hisi_acc_vf_debug_save);
+
+ return 0;
+}
+
+static void hisi_acc_vf_debugfs_exit(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ if (!debugfs_initialized())
+ return;
+
+ kfree(hisi_acc_vdev->debug_migf);
+}
+
static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
{
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
@@ -1311,9 +1506,11 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
return ret;
}
hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+ hisi_acc_vdev->dev_opened = DEV_OPEN;
}

vfio_pci_core_finish_enable(vdev);
+
return 0;
}

@@ -1322,7 +1519,10 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;

+ hisi_acc_vdev->dev_opened = DEV_CLOSE;
+ mutex_lock(&hisi_acc_vdev->enable_mutex);
iounmap(vf_qm->io_base);
+ mutex_unlock(&hisi_acc_vdev->enable_mutex);
vfio_pci_core_close_device(core_vdev);
}

@@ -1413,6 +1613,9 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device
ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device);
if (ret)
goto out_put_vdev;
+
+ if (ops == &hisi_acc_vfio_pci_migrn_ops)
+ hisi_acc_vfio_debug_init(hisi_acc_vdev);
return 0;

out_put_vdev:
@@ -1425,6 +1628,7 @@ static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);

vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device);
+ hisi_acc_vf_debugfs_exit(hisi_acc_vdev);
vfio_put_device(&hisi_acc_vdev->core_device.vdev);
}

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 7a9dc87627cd..3a20d81d105c 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -52,6 +52,11 @@
#define QM_EQC_DW0 0X8000
#define QM_AEQC_DW0 0X8020

+enum acc_dev_state {
+ DEV_CLOSE = 0x0,
+ DEV_OPEN,
+};
+
struct acc_vf_data {
#define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
/* QM match information */
@@ -114,5 +119,11 @@ struct hisi_acc_vf_core_device {
int vf_id;
struct hisi_acc_vf_migration_file *resuming_migf;
struct hisi_acc_vf_migration_file *saving_migf;
+
+ /* To make sure the device is enabled */
+ struct mutex enable_mutex;
+ bool dev_opened;
+ /* For debugfs */
+ struct hisi_acc_vf_migration_file *debug_migf;
};
#endif /* HISI_ACC_VFIO_PCI_H */
--
2.24.0


2024-04-02 03:31:37

by liulongfang

[permalink] [raw]
Subject: [PATCH v4 4/4] Documentation: add debugfs description for hisi migration

Add a debugfs document description file to help users understand
how to use the hisilicon accelerator live migration driver's
debugfs.

Update the file paths that need to be maintained in MAINTAINERS

Signed-off-by: Longfang Liu <[email protected]>
---
.../ABI/testing/debugfs-hisi-migration | 34 +++++++++++++++++++
MAINTAINERS | 1 +
2 files changed, 35 insertions(+)
create mode 100644 Documentation/ABI/testing/debugfs-hisi-migration

diff --git a/Documentation/ABI/testing/debugfs-hisi-migration b/Documentation/ABI/testing/debugfs-hisi-migration
new file mode 100644
index 000000000000..3d7339276e6f
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-migration
@@ -0,0 +1,34 @@
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/data
+Date: Apr 2024
+KernelVersion: 6.9
+Contact: Longfang Liu <[email protected]>
+Description: Read the live migration data of the vfio device.
+ These data include device status data, queue configuration
+ data and some task configuration data.
+ The output format of the data is defined by the live
+ migration driver.
+
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/attr
+Date: Apr 2024
+KernelVersion: 6.9
+Contact: Longfang Liu <[email protected]>
+Description: Read the live migration attributes of the vfio device.
+ it include device status attributes and data length attributes
+ The output format of the attributes is defined by the live
+ migration driver.
+
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/cmd_state
+Date: Apr 2024
+KernelVersion: 6.9
+Contact: Longfang Liu <[email protected]>
+Description: Used to obtain the device command sending and receiving
+ channel status. If successful, returns the command value.
+ If failed, return error log.
+
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/save
+Date: Apr 2024
+KernelVersion: 6.9
+Contact: Longfang Liu <[email protected]>
+Description: Trigger the Hisilicon accelerator device to perform
+ the state saving operation of live migration through the read
+ operation, and output the operation log results.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7625911ec2f1..8c2d13b13273 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -23072,6 +23072,7 @@ M: Longfang Liu <[email protected]>
M: Shameer Kolothum <[email protected]>
L: [email protected]
S: Maintained
+F: Documentation/ABI/testing/debugfs-hisi-migration
F: drivers/vfio/pci/hisilicon/

VFIO MEDIATED DEVICE DRIVERS
--
2.24.0


2024-04-04 20:07:51

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH v4 2/4] hisi_acc_vfio_pci: Create subfunction for data reading

On Tue, 2 Apr 2024 11:24:30 +0800
Longfang Liu <[email protected]> wrote:

> During the live migration process. It needs to obtain various status
> data of drivers and devices. In order to facilitate calling it in the
> debugfs function. For all operations that read data from device registers,
> the driver creates a subfunction.
> Also fixed the location of address data.

Cédric noted privately and I agree, 1) fixes should be provided in
separate patches with a Fixes: tag rather than subtly included in a
minor refactoring, and 2) what does this imply about the existing
functionality of migration? This would seem to suggest existing
migration data is bogus if we're offset by a register reading the DMA
address. The commit log for the Fixes patch should describe this.

>
> Signed-off-by: Longfang Liu <[email protected]>
> ---
> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 56 +++++++++++--------
> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 3 +
> 2 files changed, 37 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> index 45351be8e270..bf358ba94b5d 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> @@ -486,6 +486,39 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
> return 0;
> }
>
> +static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)
> +{
> + struct device *dev = &vf_qm->pdev->dev;
> + int ret;
> +
> + ret = qm_get_regs(vf_qm, vf_data);
> + if (ret)
> + return -EINVAL;
> +
> + /* Every reg is 32 bit, the dma address is 64 bit. */
> + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
> + vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
> + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
> + vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
> +
> + /* Through SQC_BT/CQC_BT to get sqc and cqc address */
> + ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
> + if (ret) {
> + dev_err(dev, "failed to read SQC addr!\n");
> + return -EINVAL;
> + }
> +
> + ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
> + if (ret) {
> + dev_err(dev, "failed to read CQC addr!\n");
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
> struct hisi_acc_vf_migration_file *migf)
> {
> @@ -511,31 +544,10 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
> return ret;
> }
>
> - ret = qm_get_regs(vf_qm, vf_data);
> + ret = vf_qm_read_data(vf_qm, vf_data);
> if (ret)
> return -EINVAL;
>
> - /* Every reg is 32 bit, the dma address is 64 bit. */
> - vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
> - vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> - vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
> - vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
> - vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> - vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
> -
> - /* Through SQC_BT/CQC_BT to get sqc and cqc address */
> - ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
> - if (ret) {
> - dev_err(dev, "failed to read SQC addr!\n");
> - return -EINVAL;
> - }
> -
> - ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
> - if (ret) {
> - dev_err(dev, "failed to read CQC addr!\n");
> - return -EINVAL;
> - }
> -
> migf->total_length = sizeof(struct acc_vf_data);
> return 0;
> }
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> index 5bab46602fad..7a9dc87627cd 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> @@ -38,6 +38,9 @@
> #define QM_REG_ADDR_OFFSET 0x0004
>
> #define QM_XQC_ADDR_OFFSET 32U
> +#define QM_XQC_ADDR_LOW 0x1
> +#define QM_XQC_ADDR_HIGH 0x2
> +
> #define QM_VF_AEQ_INT_MASK 0x0004
> #define QM_VF_EQ_INT_MASK 0x000c
> #define QM_IFC_INT_SOURCE_V 0x0020


2024-04-04 20:08:43

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] Documentation: add debugfs description for hisi migration

On Tue, 2 Apr 2024 11:24:32 +0800
Longfang Liu <[email protected]> wrote:

> Add a debugfs document description file to help users understand
> how to use the hisilicon accelerator live migration driver's
> debugfs.
>
> Update the file paths that need to be maintained in MAINTAINERS
>
> Signed-off-by: Longfang Liu <[email protected]>
> ---
> .../ABI/testing/debugfs-hisi-migration | 34 +++++++++++++++++++
> MAINTAINERS | 1 +
> 2 files changed, 35 insertions(+)
> create mode 100644 Documentation/ABI/testing/debugfs-hisi-migration
>
> diff --git a/Documentation/ABI/testing/debugfs-hisi-migration b/Documentation/ABI/testing/debugfs-hisi-migration
> new file mode 100644
> index 000000000000..3d7339276e6f
> --- /dev/null
> +++ b/Documentation/ABI/testing/debugfs-hisi-migration
> @@ -0,0 +1,34 @@
> +What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/data
> +Date: Apr 2024
> +KernelVersion: 6.9

At best 6.10 with a merge window in May.

> +Contact: Longfang Liu <[email protected]>
> +Description: Read the live migration data of the vfio device.
> + These data include device status data, queue configuration
> + data and some task configuration data.
> + The output format of the data is defined by the live
> + migration driver.

"Dumps the device debug migration buffer, state must first be saved
using the 'save' attribute."

> +
> +What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/attr
> +Date: Apr 2024
> +KernelVersion: 6.9
> +Contact: Longfang Liu <[email protected]>
> +Description: Read the live migration attributes of the vfio device.
> + it include device status attributes and data length attributes
> + The output format of the attributes is defined by the live
> + migration driver.

AFAICT from the previous patch, this attribute is useless.

> +
> +What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/cmd_state
> +Date: Apr 2024
> +KernelVersion: 6.9
> +Contact: Longfang Liu <[email protected]>
> +Description: Used to obtain the device command sending and receiving
> + channel status. If successful, returns the command value.
> + If failed, return error log.
> +

Seems like it statically returns "OK" plus the actual value.


> +What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/save
> +Date: Apr 2024
> +KernelVersion: 6.9
> +Contact: Longfang Liu <[email protected]>
> +Description: Trigger the Hisilicon accelerator device to perform
> + the state saving operation of live migration through the read
> + operation, and output the operation log results.

These interfaces are confusing, attr and data only work if there has
either been a previous save OR the user migration process closed saving
or resuming fds in the interim, and the user doesn't know which one
they get. Note that debug_migf isn't even discarded between
open/close, only cmd and save require the device to be opened by a
user, data and attr might continue to return data from some previous
user save, resume, or debugfs save.



> diff --git a/MAINTAINERS b/MAINTAINERS
> index 7625911ec2f1..8c2d13b13273 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -23072,6 +23072,7 @@ M: Longfang Liu <[email protected]>
> M: Shameer Kolothum <[email protected]>
> L: [email protected]
> S: Maintained
> +F: Documentation/ABI/testing/debugfs-hisi-migration
> F: drivers/vfio/pci/hisilicon/
>
> VFIO MEDIATED DEVICE DRIVERS


2024-04-04 20:35:40

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH v4 3/4] hisi_acc_vfio_pci: register debugfs for hisilicon migration driver

On Tue, 2 Apr 2024 11:24:31 +0800
Longfang Liu <[email protected]> wrote:

> On the debugfs framework of VFIO, if the CONFIG_VFIO_DEBUGFS macro is
> enabled, the debug function is registered for the live migration driver
> of the HiSilicon accelerator device.
>
> After registering the HiSilicon accelerator device on the debugfs
> framework of live migration of vfio, a directory file "hisi_acc"
> of debugfs is created, and then three debug function files are
> created in this directory:
>
> vfio
> |
> +---<dev_name1>
> | +---migration
> | +--state
> | +--hisi_acc
> | +--attr
> | +--data
> | +--save
> | +--cmd_state
> |
> +---<dev_name2>
> +---migration
> +--state
> +--hisi_acc
> +--attr
> +--data
> +--save
> +--cmd_state
>
> data file: used to get the migration data from the driver
> attr file: used to get device attributes parameters from the driver
> save file: used to read the data of the live migration device and save
> it to the driver.
> cmd_state: used to get the cmd channel state for the device.
>
> +----------------+ +--------------+ +---------------+
> | migration dev | | src dev | | dst dev |
> +-------+--------+ +------+-------+ +-------+-------+
> | | |
> | | |
> | | |
> | | |
> save | +------v-------+ +-------v-------+
> | | saving_mif | | resuming_migf |
> | | file | | file |
> | +------+-------+ +-------+-------+
> | | |
> | mutex | |
> +-------v--------+ | |
> | | | |
> | debug_migf file<---------------+-----------------------+
> | | copy
> +-------+--------+
> |
> cat |
> |
> +-------v--------+
> | user |
> +----------------+
>
> In debugfs scheme. The driver creates a separate debug_migf file.
> It is completely separated from the two files of live migration,
> thus preventing debugfs data from interfering with migration data.
> Moreover, it only performs read operations on the device.
>
> For serialization of debugfs:
> First, it only writes data when performing a debugfs save operation.

This distinction between "writing" and "copying" is very confusing.

> Second, it is only copied from the file on the migration device
> when the live migration is complete.

Why does it do this at all? If you're looking for a postmortem of the
user generated buffer, that should be explicitly stated.

> These two operations are mutually exclusive through mutex.

The mutual exclusion between debugfs operations is not the concern, the
question is whether there's serialization that prevents the debugfs
operations from interfering with the user migration flow. Nothing here
seems to prevent concurrent use of the debugfs interface proposed here
with a user migration.

>
> Signed-off-by: Longfang Liu <[email protected]>
> ---
> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 204 ++++++++++++++++++
> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 11 +
> 2 files changed, 215 insertions(+)
>
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> index bf358ba94b5d..9f563a31a2a1 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> @@ -15,6 +15,7 @@
> #include <linux/anon_inodes.h>
>
> #include "hisi_acc_vfio_pci.h"
> +#include "../../vfio.h"

This include seems not to be required.

>
> /* Return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */
> static int qm_wait_dev_not_ready(struct hisi_qm *qm)
> @@ -618,6 +619,22 @@ hisi_acc_check_int_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
> }
> }
>
> +static void hisi_acc_vf_migf_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
> + struct hisi_acc_vf_migration_file *src_migf)

Seems this should be named something relative to debug since it's only
purpose is to copy the migration file to the debug migration file.

> +{
> + struct hisi_acc_vf_migration_file *dst_migf = hisi_acc_vdev->debug_migf;
> +
> + if (!dst_migf)
> + return;
> +
> + mutex_lock(&hisi_acc_vdev->enable_mutex);
> + dst_migf->disabled = src_migf->disabled;

In the cases where this is called, the caller is about to call
hisi_acc_vf_disable_fd() which sets disabled = true and then
hisi_acc_vf_debug_save() doesn't touch the value! What does it even
mean to copy this value, let alone print it as part of the debugfs
output later?


> + dst_migf->total_length = src_migf->total_length;
> + memcpy(&dst_migf->vf_data, &src_migf->vf_data,
> + sizeof(struct acc_vf_data));
> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
> +}
> +
> static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
> {
> mutex_lock(&migf->lock);
> @@ -630,12 +647,14 @@ static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
> static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vdev)
> {
> if (hisi_acc_vdev->resuming_migf) {
> + hisi_acc_vf_migf_save(hisi_acc_vdev, hisi_acc_vdev->resuming_migf);
> hisi_acc_vf_disable_fd(hisi_acc_vdev->resuming_migf);
> fput(hisi_acc_vdev->resuming_migf->filp);
> hisi_acc_vdev->resuming_migf = NULL;
> }
>
> if (hisi_acc_vdev->saving_migf) {
> + hisi_acc_vf_migf_save(hisi_acc_vdev, hisi_acc_vdev->saving_migf);

Why are these buffers copied to the debug_migf in this case? This can
happen asynchronous to accessing the debugfs migration file and there's
no serialization.

> hisi_acc_vf_disable_fd(hisi_acc_vdev->saving_migf);
> fput(hisi_acc_vdev->saving_migf->filp);
> hisi_acc_vdev->saving_migf = NULL;
> @@ -1144,6 +1163,7 @@ static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
> if (!vf_qm->io_base)
> return -EIO;
>
> + mutex_init(&hisi_acc_vdev->enable_mutex);
> vf_qm->fun_type = QM_HW_VF;
> vf_qm->pdev = vf_dev;
> mutex_init(&vf_qm->mailbox_lock);
> @@ -1294,6 +1314,181 @@ static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int
> return vfio_pci_core_ioctl(core_vdev, cmd, arg);
> }
>
> +static int hisi_acc_vf_debug_check(struct seq_file *seq, struct vfio_device *vdev)
> +{
> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
> + struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->debug_migf;
> + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
> + int ret;
> +

lockdep_assert_held(...)

> + if (!vdev->mig_ops || !migf) {
> + seq_printf(seq, "%s\n", "device does not support live migration!");
> + return -EINVAL;

Isn't the -EINVAL sufficient?

> + }
> +
> + /**
> + * When the device is not opened, the io_base is not mapped.
> + * The driver cannot perform device read and write operations.
> + */
> + if (hisi_acc_vdev->dev_opened != DEV_OPEN) {

Why are we assigning and testing a bool against and enum?!

> + seq_printf(seq, "%s\n", "device not opened!");
> + return -EINVAL;
> + }
> +
> + ret = qm_wait_dev_not_ready(vf_qm);
> + if (ret) {
> + seq_printf(seq, "%s\n", "VF device not ready!");
> + return -EINVAL;

-EBUSY? Again, not sure why we need the seq_printf() in addition to
the error value.

> + }
> +
> + return 0;
> +}
> +
> +static int hisi_acc_vf_debug_cmd(struct seq_file *seq, void *data)
> +{
> + struct device *vf_dev = seq->private;
> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
> + struct vfio_device *vdev = &core_device->vdev;
> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
> + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
> + u64 value;
> + int ret;
> +
> + mutex_lock(&hisi_acc_vdev->enable_mutex);
> + ret = hisi_acc_vf_debug_check(seq, vdev);
> + if (ret) {
> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
> + return 0;

Why do we squash the error return here and throughout?

> + }
> +
> + value = readl(vf_qm->io_base + QM_MB_CMD_SEND_BASE);
> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
> + seq_printf(seq, "%s:0x%llx\n", "mailbox cmd channel state is OK", value);

We didn't test the value, what makes the state OK? Can this readl() or
those in qm_wait_dev_not_ready() interfere with the main device flow?

> +
> + return 0;
> +}
> +
> +static int hisi_acc_vf_debug_save(struct seq_file *seq, void *data)
> +{
> + struct device *vf_dev = seq->private;
> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
> + struct vfio_device *vdev = &core_device->vdev;
> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
> + struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->debug_migf;
> + struct acc_vf_data *vf_data = &migf->vf_data;
> + int ret;
> +
> + mutex_lock(&hisi_acc_vdev->enable_mutex);
> + ret = hisi_acc_vf_debug_check(seq, vdev);
> + if (ret) {
> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
> + return 0;
> + }
> +
> + vf_data->vf_qm_state = QM_READY;
> + ret = vf_qm_read_data(&hisi_acc_vdev->vf_qm, vf_data);
> + if (ret) {
> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
> + seq_printf(seq, "%s\n", "failed to save device data!");
> + return 0;
> + }
> +
> + migf->total_length = sizeof(struct acc_vf_data);
> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
> + seq_printf(seq, "%s\n", "successful to save device data!");
> +
> + return 0;
> +}
> +
> +static int hisi_acc_vf_data_read(struct seq_file *seq, void *data)
> +{
> + struct device *vf_dev = seq->private;
> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
> + struct vfio_device *vdev = &core_device->vdev;
> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
> + struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf;
> + size_t vf_data_sz = offsetofend(struct acc_vf_data, padding);
> +
> + if (debug_migf && debug_migf->total_length)
> + seq_hex_dump(seq, "Mig Data:", DUMP_PREFIX_OFFSET, 16, 1,
> + (unsigned char *)&debug_migf->vf_data,
> + vf_data_sz, false);
> + else
> + seq_printf(seq, "%s\n", "device not migrated!");

"device state not saved"? Although I don't recall why this doesn't
just return an errno.

> +
> + return 0;
> +}
> +
> +static int hisi_acc_vf_attr_read(struct seq_file *seq, void *data)
> +{
> + struct device *vf_dev = seq->private;
> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
> + struct vfio_device *vdev = &core_device->vdev;
> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
> + struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf;
> +
> + if (debug_migf && debug_migf->total_length) {
> + seq_printf(seq,
> + "acc device:\n"
> + "device state: %d\n"
> + "device ready: %u\n"
> + "data valid: %d\n"
> + "data size: %lu\n",
> + hisi_acc_vdev->mig_state,

This is redundant to migration/state, however note
hisi_acc_vfio_pci_get_device_state() protects the value with state
mutex while reading it.

> + hisi_acc_vdev->vf_qm_state,

What's the purpose of this, it's ready or not, what does that mean?

> + debug_migf->disabled,

What's the purpose of this?

> + debug_migf->total_length);

Why not just have this printed or inferred via the above data_read
function, this all seems unnecessary.

> + } else {
> + seq_printf(seq, "%s\n", "device not migrated!");
> + }
> +
> + return 0;
> +}
> +
> +static int hisi_acc_vfio_debug_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
> +{
> + struct vfio_device *vdev = &hisi_acc_vdev->core_device.vdev;
> + struct dentry *vfio_dev_migration = NULL;
> + struct dentry *vfio_hisi_acc = NULL;
> + struct device *dev = vdev->dev;
> + void *migf = NULL;
> +
> + if (!debugfs_initialized())
> + return 0;
> +
> + migf = kzalloc(sizeof(struct hisi_acc_vf_migration_file), GFP_KERNEL);
> + if (!migf)
> + return -ENOMEM;
> + hisi_acc_vdev->debug_migf = migf;
> +
> + vfio_dev_migration = debugfs_lookup("migration", vdev->debug_root);

Fails to build without CONFIG_DEBUG_FS=y There should be a dependency
on CONFIG_VFIO_DEBUGFS here somewhere.

> + if (!vfio_dev_migration) {
> + kfree(migf);

hisi_acc_vdev->debug_migf still points at this freed buffer, the return
value of this function is not tested, allows a use-after-free in
all of the below debugfs interfaces.

> + dev_err(dev, "failed to lookup migration debugfs file!\n");
> + return -ENODEV;
> + }
> +
> + vfio_hisi_acc = debugfs_create_dir("hisi_acc", vfio_dev_migration);
> + debugfs_create_devm_seqfile(dev, "data", vfio_hisi_acc,
> + hisi_acc_vf_data_read);
> + debugfs_create_devm_seqfile(dev, "attr", vfio_hisi_acc,
> + hisi_acc_vf_attr_read);
> + debugfs_create_devm_seqfile(dev, "cmd_state", vfio_hisi_acc,
> + hisi_acc_vf_debug_cmd);
> + debugfs_create_devm_seqfile(dev, "save", vfio_hisi_acc,
> + hisi_acc_vf_debug_save);
> +
> + return 0;
> +}
> +
> +static void hisi_acc_vf_debugfs_exit(struct hisi_acc_vf_core_device *hisi_acc_vdev)
> +{
> + if (!debugfs_initialized())
> + return;
> +
> + kfree(hisi_acc_vdev->debug_migf);

Double free if the lookup in init fails.

> +}
> +
> static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
> {
> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
> @@ -1311,9 +1506,11 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
> return ret;
> }
> hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
> + hisi_acc_vdev->dev_opened = DEV_OPEN;

= true!

> }
>
> vfio_pci_core_finish_enable(vdev);
> +
> return 0;
> }
>
> @@ -1322,7 +1519,10 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
> struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
>
> + hisi_acc_vdev->dev_opened = DEV_CLOSE;
> + mutex_lock(&hisi_acc_vdev->enable_mutex);
> iounmap(vf_qm->io_base);
> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
> vfio_pci_core_close_device(core_vdev);
> }
>
> @@ -1413,6 +1613,9 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device
> ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device);
> if (ret)
> goto out_put_vdev;
> +
> + if (ops == &hisi_acc_vfio_pci_migrn_ops)
> + hisi_acc_vfio_debug_init(hisi_acc_vdev);
> return 0;
>
> out_put_vdev:
> @@ -1425,6 +1628,7 @@ static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
>
> vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device);
> + hisi_acc_vf_debugfs_exit(hisi_acc_vdev);
> vfio_put_device(&hisi_acc_vdev->core_device.vdev);
> }
>
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> index 7a9dc87627cd..3a20d81d105c 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> @@ -52,6 +52,11 @@
> #define QM_EQC_DW0 0X8000
> #define QM_AEQC_DW0 0X8020
>
> +enum acc_dev_state {
> + DEV_CLOSE = 0x0,
> + DEV_OPEN,
> +};
> +
> struct acc_vf_data {
> #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
> /* QM match information */
> @@ -114,5 +119,11 @@ struct hisi_acc_vf_core_device {
> int vf_id;
> struct hisi_acc_vf_migration_file *resuming_migf;
> struct hisi_acc_vf_migration_file *saving_migf;
> +
> + /* To make sure the device is enabled */
> + struct mutex enable_mutex;
> + bool dev_opened;
> + /* For debugfs */
> + struct hisi_acc_vf_migration_file *debug_migf;
> };
> #endif /* HISI_ACC_VFIO_PCI_H */


2024-04-12 03:07:26

by liulongfang

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] Documentation: add debugfs description for hisi migration

On 2024/4/5 4:07, Alex Williamson wrote:
> On Tue, 2 Apr 2024 11:24:32 +0800
> Longfang Liu <[email protected]> wrote:
>
>> Add a debugfs document description file to help users understand
>> how to use the hisilicon accelerator live migration driver's
>> debugfs.
>>
>> Update the file paths that need to be maintained in MAINTAINERS
>>
>> Signed-off-by: Longfang Liu <[email protected]>
>> ---
>> .../ABI/testing/debugfs-hisi-migration | 34 +++++++++++++++++++
>> MAINTAINERS | 1 +
>> 2 files changed, 35 insertions(+)
>> create mode 100644 Documentation/ABI/testing/debugfs-hisi-migration
>>
>> diff --git a/Documentation/ABI/testing/debugfs-hisi-migration b/Documentation/ABI/testing/debugfs-hisi-migration
>> new file mode 100644
>> index 000000000000..3d7339276e6f
>> --- /dev/null
>> +++ b/Documentation/ABI/testing/debugfs-hisi-migration
>> @@ -0,0 +1,34 @@
>> +What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/data
>> +Date: Apr 2024
>> +KernelVersion: 6.9
>
> At best 6.10 with a merge window in May.
>
>> +Contact: Longfang Liu <[email protected]>
>> +Description: Read the live migration data of the vfio device.
>> + These data include device status data, queue configuration
>> + data and some task configuration data.
>> + The output format of the data is defined by the live
>> + migration driver.
>
> "Dumps the device debug migration buffer, state must first be saved
> using the 'save' attribute."
>
>> +
>> +What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/attr
>> +Date: Apr 2024
>> +KernelVersion: 6.9
>> +Contact: Longfang Liu <[email protected]>
>> +Description: Read the live migration attributes of the vfio device.
>> + it include device status attributes and data length attributes
>> + The output format of the attributes is defined by the live
>> + migration driver.
>
> AFAICT from the previous patch, this attribute is useless.
>
>> +
>> +What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/cmd_state
>> +Date: Apr 2024
>> +KernelVersion: 6.9
>> +Contact: Longfang Liu <[email protected]>
>> +Description: Used to obtain the device command sending and receiving
>> + channel status. If successful, returns the command value.
>> + If failed, return error log.
>> +
>
> Seems like it statically returns "OK" plus the actual value.
>
>
>> +What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/save
>> +Date: Apr 2024
>> +KernelVersion: 6.9
>> +Contact: Longfang Liu <[email protected]>
>> +Description: Trigger the Hisilicon accelerator device to perform
>> + the state saving operation of live migration through the read
>> + operation, and output the operation log results.
>
> These interfaces are confusing, attr and data only work if there has
> either been a previous save OR the user migration process closed saving
> or resuming fds in the interim, and the user doesn't know which one
> they get. Note that debug_migf isn't even discarded between
> open/close, only cmd and save require the device to be opened by a
> user, data and attr might continue to return data from some previous
> user save, resume, or debugfs save.
>

data: Indicates the device migration data obtained after the migration is completed.
This data is saved in debug_migf. The user reads it through "cat" and
presents it to the user in the form of hexadecimal pure data.

attr: Indicates the configuration parameters of the migration process after the
migration is completed. These parameters are saved in vfio device and debug_migf.
The user reads it through "cat" and presents it to the user in the form of key-value
pairs such as <attribute name, attribute value>.

Save is an action process. After "cat" it, a migration save operation will be
performed and the result data will be updated to debug_migf.

There is still a big difference between data and attr, and the data formats are
also different. Not merging makes it easier for users to obtain information.
If you feel confused about save, it is recommended to use migrate_save.

Thanks,
Longfang.

>
>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 7625911ec2f1..8c2d13b13273 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -23072,6 +23072,7 @@ M: Longfang Liu <[email protected]>
>> M: Shameer Kolothum <[email protected]>
>> L: [email protected]
>> S: Maintained
>> +F: Documentation/ABI/testing/debugfs-hisi-migration
>> F: drivers/vfio/pci/hisilicon/
>>
>> VFIO MEDIATED DEVICE DRIVERS
>
> .
>

2024-04-12 03:11:47

by liulongfang

[permalink] [raw]
Subject: Re: [PATCH v4 2/4] hisi_acc_vfio_pci: Create subfunction for data reading

On 2024/4/5 4:07, Alex Williamson wrote:
> On Tue, 2 Apr 2024 11:24:30 +0800
> Longfang Liu <[email protected]> wrote:
>
>> During the live migration process. It needs to obtain various status
>> data of drivers and devices. In order to facilitate calling it in the
>> debugfs function. For all operations that read data from device registers,
>> the driver creates a subfunction.
>> Also fixed the location of address data.
>
> Cédric noted privately and I agree, 1) fixes should be provided in
> separate patches with a Fixes: tag rather than subtly included in a
> minor refactoring, and 2) what does this imply about the existing
> functionality of migration? This would seem to suggest existing
> migration data is bogus if we're offset by a register reading the DMA
> address. The commit log for the Fixes patch should describe this.
>

Okay, the modification of the DMA address offset part is split into
a new patch, and the modification of this part is explained clearly.

Thanks,
Longfang.

>>
>> Signed-off-by: Longfang Liu <[email protected]>
>> ---
>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 56 +++++++++++--------
>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 3 +
>> 2 files changed, 37 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> index 45351be8e270..bf358ba94b5d 100644
>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> @@ -486,6 +486,39 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>> return 0;
>> }
>>
>> +static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)
>> +{
>> + struct device *dev = &vf_qm->pdev->dev;
>> + int ret;
>> +
>> + ret = qm_get_regs(vf_qm, vf_data);
>> + if (ret)
>> + return -EINVAL;
>> +
>> + /* Every reg is 32 bit, the dma address is 64 bit. */
>> + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
>> + vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
>> + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
>> + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
>> + vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
>> + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
>> +
>> + /* Through SQC_BT/CQC_BT to get sqc and cqc address */
>> + ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
>> + if (ret) {
>> + dev_err(dev, "failed to read SQC addr!\n");
>> + return -EINVAL;
>> + }
>> +
>> + ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
>> + if (ret) {
>> + dev_err(dev, "failed to read CQC addr!\n");
>> + return -EINVAL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>> struct hisi_acc_vf_migration_file *migf)
>> {
>> @@ -511,31 +544,10 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>> return ret;
>> }
>>
>> - ret = qm_get_regs(vf_qm, vf_data);
>> + ret = vf_qm_read_data(vf_qm, vf_data);
>> if (ret)
>> return -EINVAL;
>>
>> - /* Every reg is 32 bit, the dma address is 64 bit. */
>> - vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
>> - vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
>> - vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
>> - vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
>> - vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
>> - vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
>> -
>> - /* Through SQC_BT/CQC_BT to get sqc and cqc address */
>> - ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
>> - if (ret) {
>> - dev_err(dev, "failed to read SQC addr!\n");
>> - return -EINVAL;
>> - }
>> -
>> - ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
>> - if (ret) {
>> - dev_err(dev, "failed to read CQC addr!\n");
>> - return -EINVAL;
>> - }
>> -
>> migf->total_length = sizeof(struct acc_vf_data);
>> return 0;
>> }
>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> index 5bab46602fad..7a9dc87627cd 100644
>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> @@ -38,6 +38,9 @@
>> #define QM_REG_ADDR_OFFSET 0x0004
>>
>> #define QM_XQC_ADDR_OFFSET 32U
>> +#define QM_XQC_ADDR_LOW 0x1
>> +#define QM_XQC_ADDR_HIGH 0x2
>> +
>> #define QM_VF_AEQ_INT_MASK 0x0004
>> #define QM_VF_EQ_INT_MASK 0x000c
>> #define QM_IFC_INT_SOURCE_V 0x0020
>
> .
>

2024-04-12 08:29:31

by liulongfang

[permalink] [raw]
Subject: Re: [PATCH v4 3/4] hisi_acc_vfio_pci: register debugfs for hisilicon migration driver

On 2024/4/5 4:07, Alex Williamson wrote:
> On Tue, 2 Apr 2024 11:24:31 +0800
> Longfang Liu <[email protected]> wrote:
>
>> On the debugfs framework of VFIO, if the CONFIG_VFIO_DEBUGFS macro is
>> enabled, the debug function is registered for the live migration driver
>> of the HiSilicon accelerator device.
>>
>> After registering the HiSilicon accelerator device on the debugfs
>> framework of live migration of vfio, a directory file "hisi_acc"
>> of debugfs is created, and then three debug function files are
>> created in this directory:
>>
>> vfio
>> |
>> +---<dev_name1>
>> | +---migration
>> | +--state
>> | +--hisi_acc
>> | +--attr
>> | +--data
>> | +--save
>> | +--cmd_state
>> |
>> +---<dev_name2>
>> +---migration
>> +--state
>> +--hisi_acc
>> +--attr
>> +--data
>> +--save
>> +--cmd_state
>>
>> data file: used to get the migration data from the driver
>> attr file: used to get device attributes parameters from the driver
>> save file: used to read the data of the live migration device and save
>> it to the driver.
>> cmd_state: used to get the cmd channel state for the device.
>>
>> +----------------+ +--------------+ +---------------+
>> | migration dev | | src dev | | dst dev |
>> +-------+--------+ +------+-------+ +-------+-------+
>> | | |
>> | | |
>> | | |
>> | | |
>> save | +------v-------+ +-------v-------+
>> | | saving_mif | | resuming_migf |
>> | | file | | file |
>> | +------+-------+ +-------+-------+
>> | | |
>> | mutex | |
>> +-------v--------+ | |
>> | | | |
>> | debug_migf file<---------------+-----------------------+
>> | | copy
>> +-------+--------+
>> |
>> cat |
>> |
>> +-------v--------+
>> | user |
>> +----------------+
>>
>> In debugfs scheme. The driver creates a separate debug_migf file.
>> It is completely separated from the two files of live migration,
>> thus preventing debugfs data from interfering with migration data.
>> Moreover, it only performs read operations on the device.
>>
>> For serialization of debugfs:
>> First, it only writes data when performing a debugfs save operation.
>
> This distinction between "writing" and "copying" is very confusing.
>

"Writing" means reading data from the device and writing it to debug_migf.
"Copying" is to copy the data that has been saved in other migf files to
debug_migf.
The destination of both operations is the same.
The data sources are different.

>> Second, it is only copied from the file on the migration device
>> when the live migration is complete.
>
> Why does it do this at all? If you're looking for a postmortem of the
> user generated buffer, that should be explicitly stated.
>

debug_migf is a data buffer. Used to cache debugfs data for users

>> These two operations are mutually exclusive through mutex.
>
> The mutual exclusion between debugfs operations is not the concern, the
> question is whether there's serialization that prevents the debugfs
> operations from interfering with the user migration flow. Nothing here
> seems to prevent concurrent use of the debugfs interface proposed here
> with a user migration.
>

Reading data directly from the device does not cause any problems.
The device supports concurrent requests for read operations.
Therefore, there is no mutual exclusion between the debugfs save
operation and the user migration operation.

>>
>> Signed-off-by: Longfang Liu <[email protected]>
>> ---
>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 204 ++++++++++++++++++
>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 11 +
>> 2 files changed, 215 insertions(+)
>>
>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> index bf358ba94b5d..9f563a31a2a1 100644
>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> @@ -15,6 +15,7 @@
>> #include <linux/anon_inodes.h>
>>
>> #include "hisi_acc_vfio_pci.h"
>> +#include "../../vfio.h"
>
> This include seems not to be required.
>

OK, let me modify it and verify it.

>>
>> /* Return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */
>> static int qm_wait_dev_not_ready(struct hisi_qm *qm)
>> @@ -618,6 +619,22 @@ hisi_acc_check_int_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>> }
>> }
>>
>> +static void hisi_acc_vf_migf_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>> + struct hisi_acc_vf_migration_file *src_migf)
>
> Seems this should be named something relative to debug since it's only
> purpose is to copy the migration file to the debug migration file.

How about hisi_acc_debug_migf_copy?

>
>> +{
>> + struct hisi_acc_vf_migration_file *dst_migf = hisi_acc_vdev->debug_migf;
>> +
>> + if (!dst_migf)
>> + return;
>> +
>> + mutex_lock(&hisi_acc_vdev->enable_mutex);
>> + dst_migf->disabled = src_migf->disabled;
>
> In the cases where this is called, the caller is about to call
> hisi_acc_vf_disable_fd() which sets disabled = true and then
> hisi_acc_vf_debug_save() doesn't touch the value! What does it even
> mean to copy this value, let alone print it as part of the debugfs
> output later?
>

Yes, the disable assignment of debug_migf needs to be processed
in hisi_acc_vf_debug_save.

>
>> + dst_migf->total_length = src_migf->total_length;
>> + memcpy(&dst_migf->vf_data, &src_migf->vf_data,
>> + sizeof(struct acc_vf_data));
>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>> +}
>> +
>> static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
>> {
>> mutex_lock(&migf->lock);
>> @@ -630,12 +647,14 @@ static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
>> static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>> {
>> if (hisi_acc_vdev->resuming_migf) {
>> + hisi_acc_vf_migf_save(hisi_acc_vdev, hisi_acc_vdev->resuming_migf);
>> hisi_acc_vf_disable_fd(hisi_acc_vdev->resuming_migf);
>> fput(hisi_acc_vdev->resuming_migf->filp);
>> hisi_acc_vdev->resuming_migf = NULL;
>> }
>>
>> if (hisi_acc_vdev->saving_migf) {
>> + hisi_acc_vf_migf_save(hisi_acc_vdev, hisi_acc_vdev->saving_migf);
>
> Why are these buffers copied to the debug_migf in this case? This can
> happen asynchronous to accessing the debugfs migration file and there's
> no serialization.
>

We can try to copy when accessing debugfs.

>> hisi_acc_vf_disable_fd(hisi_acc_vdev->saving_migf);
>> fput(hisi_acc_vdev->saving_migf->filp);
>> hisi_acc_vdev->saving_migf = NULL;
>> @@ -1144,6 +1163,7 @@ static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>> if (!vf_qm->io_base)
>> return -EIO;
>>
>> + mutex_init(&hisi_acc_vdev->enable_mutex);
>> vf_qm->fun_type = QM_HW_VF;
>> vf_qm->pdev = vf_dev;
>> mutex_init(&vf_qm->mailbox_lock);
>> @@ -1294,6 +1314,181 @@ static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int
>> return vfio_pci_core_ioctl(core_vdev, cmd, arg);
>> }
>>
>> +static int hisi_acc_vf_debug_check(struct seq_file *seq, struct vfio_device *vdev)
>> +{
>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>> + struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->debug_migf;
>> + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
>> + int ret;
>> +
>
> lockdep_assert_held(...)
>
>> + if (!vdev->mig_ops || !migf) {
>> + seq_printf(seq, "%s\n", "device does not support live migration!");
>> + return -EINVAL;
>
> Isn't the -EINVAL sufficient?
>

Which one do you think is better?

>> + }
>> +
>> + /**
>> + * When the device is not opened, the io_base is not mapped.
>> + * The driver cannot perform device read and write operations.
>> + */
>> + if (hisi_acc_vdev->dev_opened != DEV_OPEN) {
>
> Why are we assigning and testing a bool against and enum?!
>

OK, change to true and false assignment.

>> + seq_printf(seq, "%s\n", "device not opened!");
>> + return -EINVAL;
>> + }
>> +
>> + ret = qm_wait_dev_not_ready(vf_qm);
>> + if (ret) {
>> + seq_printf(seq, "%s\n", "VF device not ready!");
>> + return -EINVAL;
>
> -EBUSY? Again, not sure why we need the seq_printf() in addition to
> the error value.
>

OK, -EBUSY is better.
seq_printf() allows users to directly obtain the cause of the
error without checking dmesg.


>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int hisi_acc_vf_debug_cmd(struct seq_file *seq, void *data)
>> +{
>> + struct device *vf_dev = seq->private;
>> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
>> + struct vfio_device *vdev = &core_device->vdev;
>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>> + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
>> + u64 value;
>> + int ret;
>> +
>> + mutex_lock(&hisi_acc_vdev->enable_mutex);
>> + ret = hisi_acc_vf_debug_check(seq, vdev);
>> + if (ret) {
>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>> + return 0;
>
> Why do we squash the error return here and throughout?
>
seq_printf() gives the user a failure message.
This can be changed to "return ret;"

>> + }
>> +
>> + value = readl(vf_qm->io_base + QM_MB_CMD_SEND_BASE);
>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>> + seq_printf(seq, "%s:0x%llx\n", "mailbox cmd channel state is OK", value);
>
> We didn't test the value, what makes the state OK? Can this readl() or
> those in qm_wait_dev_not_ready() interfere with the main device flow?
>

If the cmd channel is normal, it will return a non-all-F value.
Add exception checking in the next version.

cmd channel read operation will not affect the main migration process.

>> +
>> + return 0;
>> +}
>> +
>> +static int hisi_acc_vf_debug_save(struct seq_file *seq, void *data)
>> +{
>> + struct device *vf_dev = seq->private;
>> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
>> + struct vfio_device *vdev = &core_device->vdev;
>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>> + struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->debug_migf;
>> + struct acc_vf_data *vf_data = &migf->vf_data;
>> + int ret;
>> +
>> + mutex_lock(&hisi_acc_vdev->enable_mutex);
>> + ret = hisi_acc_vf_debug_check(seq, vdev);
>> + if (ret) {
>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>> + return 0;
>> + }
>> +
>> + vf_data->vf_qm_state = QM_READY;
>> + ret = vf_qm_read_data(&hisi_acc_vdev->vf_qm, vf_data);
>> + if (ret) {
>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>> + seq_printf(seq, "%s\n", "failed to save device data!");
>> + return 0;
>> + }
>> +
>> + migf->total_length = sizeof(struct acc_vf_data);
>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>> + seq_printf(seq, "%s\n", "successful to save device data!");
>> +
>> + return 0;
>> +}
>> +
>> +static int hisi_acc_vf_data_read(struct seq_file *seq, void *data)
>> +{
>> + struct device *vf_dev = seq->private;
>> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
>> + struct vfio_device *vdev = &core_device->vdev;
>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>> + struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf;
>> + size_t vf_data_sz = offsetofend(struct acc_vf_data, padding);
>> +
>> + if (debug_migf && debug_migf->total_length)
>> + seq_hex_dump(seq, "Mig Data:", DUMP_PREFIX_OFFSET, 16, 1,
>> + (unsigned char *)&debug_migf->vf_data,
>> + vf_data_sz, false);
>> + else
>> + seq_printf(seq, "%s\n", "device not migrated!");
>
> "device state not saved"? Although I don't recall why this doesn't
> just return an errno.
>

OK, those who exit directly without migration will be processed according
to the error mode and an error code will be returned.

>> +
>> + return 0;
>> +}
>> +
>> +static int hisi_acc_vf_attr_read(struct seq_file *seq, void *data)
>> +{
>> + struct device *vf_dev = seq->private;
>> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
>> + struct vfio_device *vdev = &core_device->vdev;
>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>> + struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf;
>> +
>> + if (debug_migf && debug_migf->total_length) {
>> + seq_printf(seq,
>> + "acc device:\n"
>> + "device state: %d\n"
>> + "device ready: %u\n"
>> + "data valid: %d\n"
>> + "data size: %lu\n",
>> + hisi_acc_vdev->mig_state,
>
> This is redundant to migration/state, however note

OK!

> hisi_acc_vfio_pci_get_device_state() protects the value with state
> mutex while reading it.
>
>> + hisi_acc_vdev->vf_qm_state,
>
> What's the purpose of this, it's ready or not, what does that mean?
>

If this status is not ready, the live migration process will exit directly.
It indicates that there are two possible exceptions:
1. The acc device driver in the VM does not have insmod.
2. The acc device driver in the VM is insmoded, but the cmd channel is abnormal.

>> + debug_migf->disabled,
>
> What's the purpose of this?

Get the enable status of migf file in the driver.

>
>> + debug_migf->total_length);
>
> Why not just have this printed or inferred via the above data_read
> function, this all seems unnecessary.
>

This is used to obtain some key status of the live migration driver.
It is more important than data in problem location.
So it is output in key-value pairs.

The above data is directly output in the form of hexadecimal data.
It is used for more detailed analysis when there are no abnormalities
in the key status.

>> + } else {
>> + seq_printf(seq, "%s\n", "device not migrated!");
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int hisi_acc_vfio_debug_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>> +{
>> + struct vfio_device *vdev = &hisi_acc_vdev->core_device.vdev;
>> + struct dentry *vfio_dev_migration = NULL;
>> + struct dentry *vfio_hisi_acc = NULL;
>> + struct device *dev = vdev->dev;
>> + void *migf = NULL;
>> +
>> + if (!debugfs_initialized())
>> + return 0;
>> +
>> + migf = kzalloc(sizeof(struct hisi_acc_vf_migration_file), GFP_KERNEL);
>> + if (!migf)
>> + return -ENOMEM;
>> + hisi_acc_vdev->debug_migf = migf;
>> +
>> + vfio_dev_migration = debugfs_lookup("migration", vdev->debug_root);
>
> Fails to build without CONFIG_DEBUG_FS=y There should be a dependency
> on CONFIG_VFIO_DEBUGFS here somewhere.
>

Yes, the driver needs to add "!IS_ENABLED(CONFIG_VFIO_DEBUGFS)"
behind "debugfs_initialized()" above.

>> + if (!vfio_dev_migration) {
>> + kfree(migf);
>
> hisi_acc_vdev->debug_migf still points at this freed buffer, the return
> value of this function is not tested, allows a use-after-free in
> all of the below debugfs interfaces.
>

Yes, there needs to add "hisi_acc_vdev->debug_migf = NULL"

>> + dev_err(dev, "failed to lookup migration debugfs file!\n");
>> + return -ENODEV;
>> + }
>> +
>> + vfio_hisi_acc = debugfs_create_dir("hisi_acc", vfio_dev_migration);
>> + debugfs_create_devm_seqfile(dev, "data", vfio_hisi_acc,
>> + hisi_acc_vf_data_read);
>> + debugfs_create_devm_seqfile(dev, "attr", vfio_hisi_acc,
>> + hisi_acc_vf_attr_read);
>> + debugfs_create_devm_seqfile(dev, "cmd_state", vfio_hisi_acc,
>> + hisi_acc_vf_debug_cmd);
>> + debugfs_create_devm_seqfile(dev, "save", vfio_hisi_acc,
>> + hisi_acc_vf_debug_save);
>> +
>> + return 0;
>> +}
>> +
>> +static void hisi_acc_vf_debugfs_exit(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>> +{
>> + if (!debugfs_initialized())
>> + return;
>> +
>> + kfree(hisi_acc_vdev->debug_migf);
>
> Double free if the lookup in init fails.
>

After adding "hisi_acc_vdev->debug_migf = NULL" above.
These processing codes need to be added here:

if (hisi_acc_vdev->debug_migf)
kfree(hisi_acc_vdev->debug_migf);

>> +}
>> +
>> static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
>> {
>> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
>> @@ -1311,9 +1506,11 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
>> return ret;
>> }
>> hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
>> + hisi_acc_vdev->dev_opened = DEV_OPEN;
>
> = true!
>

OK, the next version will not use enumeration values. will use true/false assignment.

>> }
>>
>> vfio_pci_core_finish_enable(vdev);
>> +
>> return 0;
>> }
>>
>> @@ -1322,7 +1519,10 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
>> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
>> struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
>>
>> + hisi_acc_vdev->dev_opened = DEV_CLOSE;
>> + mutex_lock(&hisi_acc_vdev->enable_mutex);
>> iounmap(vf_qm->io_base);
>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>> vfio_pci_core_close_device(core_vdev);
>> }
>>
>> @@ -1413,6 +1613,9 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device
>> ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device);
>> if (ret)
>> goto out_put_vdev;
>> +
>> + if (ops == &hisi_acc_vfio_pci_migrn_ops)
>> + hisi_acc_vfio_debug_init(hisi_acc_vdev);
>> return 0;
>>
>> out_put_vdev:
>> @@ -1425,6 +1628,7 @@ static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
>> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
>>
>> vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device);
>> + hisi_acc_vf_debugfs_exit(hisi_acc_vdev);
>> vfio_put_device(&hisi_acc_vdev->core_device.vdev);
>> }
>>
>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> index 7a9dc87627cd..3a20d81d105c 100644
>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> @@ -52,6 +52,11 @@
>> #define QM_EQC_DW0 0X8000
>> #define QM_AEQC_DW0 0X8020
>>
>> +enum acc_dev_state {
>> + DEV_CLOSE = 0x0,
>> + DEV_OPEN,
>> +};
>> +
>> struct acc_vf_data {
>> #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
>> /* QM match information */
>> @@ -114,5 +119,11 @@ struct hisi_acc_vf_core_device {
>> int vf_id;
>> struct hisi_acc_vf_migration_file *resuming_migf;
>> struct hisi_acc_vf_migration_file *saving_migf;
>> +
>> + /* To make sure the device is enabled */
>> + struct mutex enable_mutex;
>> + bool dev_opened;
>> + /* For debugfs */
>> + struct hisi_acc_vf_migration_file *debug_migf;
>> };
>> #endif /* HISI_ACC_VFIO_PCI_H */
>
> .
>
Thank you very much for your careful inspection.
I will revise the inspection issues you mentioned above
one by one and publish them in the next version.

Thanks again!
Longfang.

2024-04-24 03:16:51

by liulongfang

[permalink] [raw]
Subject: Re: [PATCH v4 3/4] hisi_acc_vfio_pci: register debugfs for hisilicon migration driver

On 2024/4/12 16:15, liulongfang wrote:
> On 2024/4/5 4:07, Alex Williamson wrote:
>> On Tue, 2 Apr 2024 11:24:31 +0800
>> Longfang Liu <[email protected]> wrote:
>>
>>> On the debugfs framework of VFIO, if the CONFIG_VFIO_DEBUGFS macro is
>>> enabled, the debug function is registered for the live migration driver
>>> of the HiSilicon accelerator device.
>>>
>>> After registering the HiSilicon accelerator device on the debugfs
>>> framework of live migration of vfio, a directory file "hisi_acc"
>>> of debugfs is created, and then three debug function files are
>>> created in this directory:
>>>
>>> vfio
>>> |
>>> +---<dev_name1>
>>> | +---migration
>>> | +--state
>>> | +--hisi_acc
>>> | +--attr
>>> | +--data
>>> | +--save
>>> | +--cmd_state
>>> |
>>> +---<dev_name2>
>>> +---migration
>>> +--state
>>> +--hisi_acc
>>> +--attr
>>> +--data
>>> +--save
>>> +--cmd_state
>>>
>>> data file: used to get the migration data from the driver
>>> attr file: used to get device attributes parameters from the driver
>>> save file: used to read the data of the live migration device and save
>>> it to the driver.
>>> cmd_state: used to get the cmd channel state for the device.
>>>
>>> +----------------+ +--------------+ +---------------+
>>> | migration dev | | src dev | | dst dev |
>>> +-------+--------+ +------+-------+ +-------+-------+
>>> | | |
>>> | | |
>>> | | |
>>> | | |
>>> save | +------v-------+ +-------v-------+
>>> | | saving_mif | | resuming_migf |
>>> | | file | | file |
>>> | +------+-------+ +-------+-------+
>>> | | |
>>> | mutex | |
>>> +-------v--------+ | |
>>> | | | |
>>> | debug_migf file<---------------+-----------------------+
>>> | | copy
>>> +-------+--------+
>>> |
>>> cat |
>>> |
>>> +-------v--------+
>>> | user |
>>> +----------------+
>>>
>>> In debugfs scheme. The driver creates a separate debug_migf file.
>>> It is completely separated from the two files of live migration,
>>> thus preventing debugfs data from interfering with migration data.
>>> Moreover, it only performs read operations on the device.
>>>
>>> For serialization of debugfs:
>>> First, it only writes data when performing a debugfs save operation.
>>
>> This distinction between "writing" and "copying" is very confusing.
>>
>
> "Writing" means reading data from the device and writing it to debug_migf.
> "Copying" is to copy the data that has been saved in other migf files to
> debug_migf.
> The destination of both operations is the same.
> The data sources are different.
>
>>> Second, it is only copied from the file on the migration device
>>> when the live migration is complete.
>>
>> Why does it do this at all? If you're looking for a postmortem of the
>> user generated buffer, that should be explicitly stated.
>>
>
> debug_migf is a data buffer. Used to cache debugfs data for users
>
>>> These two operations are mutually exclusive through mutex.
>>
>> The mutual exclusion between debugfs operations is not the concern, the
>> question is whether there's serialization that prevents the debugfs
>> operations from interfering with the user migration flow. Nothing here
>> seems to prevent concurrent use of the debugfs interface proposed here
>> with a user migration.
>>
>
> Reading data directly from the device does not cause any problems.
> The device supports concurrent requests for read operations.
> Therefore, there is no mutual exclusion between the debugfs save
> operation and the user migration operation.
>
>>>
>>> Signed-off-by: Longfang Liu <[email protected]>
>>> ---
>>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 204 ++++++++++++++++++
>>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 11 +
>>> 2 files changed, 215 insertions(+)
>>>
>>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>>> index bf358ba94b5d..9f563a31a2a1 100644
>>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>>> @@ -15,6 +15,7 @@
>>> #include <linux/anon_inodes.h>
>>>
>>> #include "hisi_acc_vfio_pci.h"
>>> +#include "../../vfio.h"
>>
>> This include seems not to be required.
>>
>
> OK, let me modify it and verify it.
>
>>>
>>> /* Return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */
>>> static int qm_wait_dev_not_ready(struct hisi_qm *qm)
>>> @@ -618,6 +619,22 @@ hisi_acc_check_int_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>>> }
>>> }
>>>
>>> +static void hisi_acc_vf_migf_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>>> + struct hisi_acc_vf_migration_file *src_migf)
>>
>> Seems this should be named something relative to debug since it's only
>> purpose is to copy the migration file to the debug migration file.
>
> How about hisi_acc_debug_migf_copy?
>
>>
>>> +{
>>> + struct hisi_acc_vf_migration_file *dst_migf = hisi_acc_vdev->debug_migf;
>>> +
>>> + if (!dst_migf)
>>> + return;
>>> +
>>> + mutex_lock(&hisi_acc_vdev->enable_mutex);
>>> + dst_migf->disabled = src_migf->disabled;
>>
>> In the cases where this is called, the caller is about to call
>> hisi_acc_vf_disable_fd() which sets disabled = true and then
>> hisi_acc_vf_debug_save() doesn't touch the value! What does it even
>> mean to copy this value, let alone print it as part of the debugfs
>> output later?
>>
>
> Yes, the disable assignment of debug_migf needs to be processed
> in hisi_acc_vf_debug_save.
>
>>
>>> + dst_migf->total_length = src_migf->total_length;
>>> + memcpy(&dst_migf->vf_data, &src_migf->vf_data,
>>> + sizeof(struct acc_vf_data));
>>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>>> +}
>>> +
>>> static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
>>> {
>>> mutex_lock(&migf->lock);
>>> @@ -630,12 +647,14 @@ static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
>>> static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>>> {
>>> if (hisi_acc_vdev->resuming_migf) {
>>> + hisi_acc_vf_migf_save(hisi_acc_vdev, hisi_acc_vdev->resuming_migf);
>>> hisi_acc_vf_disable_fd(hisi_acc_vdev->resuming_migf);
>>> fput(hisi_acc_vdev->resuming_migf->filp);
>>> hisi_acc_vdev->resuming_migf = NULL;
>>> }
>>>
>>> if (hisi_acc_vdev->saving_migf) {
>>> + hisi_acc_vf_migf_save(hisi_acc_vdev, hisi_acc_vdev->saving_migf);
>>
>> Why are these buffers copied to the debug_migf in this case? This can
>> happen asynchronous to accessing the debugfs migration file and there's
>> no serialization.
>>
>
> We can try to copy when accessing debugfs.
>

I tried migrating the file to save data when accessing debugfs.
This solution will not work. resuming_migf and saving_migf will be
kfree after completing the migration, and their data will no longer
be saved.
Therefore, the data needs to be saved to debug_migf when calling "hisi_acc_vf_disable_fd".

In addition, reading the migration status data of device can be read directly during
debugfs access, without using debug_migf.

Thanks,
Longfang.

>>> hisi_acc_vf_disable_fd(hisi_acc_vdev->saving_migf);
>>> fput(hisi_acc_vdev->saving_migf->filp);
>>> hisi_acc_vdev->saving_migf = NULL;
>>> @@ -1144,6 +1163,7 @@ static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>>> if (!vf_qm->io_base)
>>> return -EIO;
>>>
>>> + mutex_init(&hisi_acc_vdev->enable_mutex);
>>> vf_qm->fun_type = QM_HW_VF;
>>> vf_qm->pdev = vf_dev;
>>> mutex_init(&vf_qm->mailbox_lock);
>>> @@ -1294,6 +1314,181 @@ static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int
>>> return vfio_pci_core_ioctl(core_vdev, cmd, arg);
>>> }
>>>
>>> +static int hisi_acc_vf_debug_check(struct seq_file *seq, struct vfio_device *vdev)
>>> +{
>>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>>> + struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->debug_migf;
>>> + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
>>> + int ret;
>>> +
>>
>> lockdep_assert_held(...)
>>
>>> + if (!vdev->mig_ops || !migf) {
>>> + seq_printf(seq, "%s\n", "device does not support live migration!");
>>> + return -EINVAL;
>>
>> Isn't the -EINVAL sufficient?
>>
>
> Which one do you think is better?
>
>>> + }
>>> +
>>> + /**
>>> + * When the device is not opened, the io_base is not mapped.
>>> + * The driver cannot perform device read and write operations.
>>> + */
>>> + if (hisi_acc_vdev->dev_opened != DEV_OPEN) {
>>
>> Why are we assigning and testing a bool against and enum?!
>>
>
> OK, change to true and false assignment.
>
>>> + seq_printf(seq, "%s\n", "device not opened!");
>>> + return -EINVAL;
>>> + }
>>> +
>>> + ret = qm_wait_dev_not_ready(vf_qm);
>>> + if (ret) {
>>> + seq_printf(seq, "%s\n", "VF device not ready!");
>>> + return -EINVAL;
>>
>> -EBUSY? Again, not sure why we need the seq_printf() in addition to
>> the error value.
>>
>
> OK, -EBUSY is better.
> seq_printf() allows users to directly obtain the cause of the
> error without checking dmesg.
>
>
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int hisi_acc_vf_debug_cmd(struct seq_file *seq, void *data)
>>> +{
>>> + struct device *vf_dev = seq->private;
>>> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
>>> + struct vfio_device *vdev = &core_device->vdev;
>>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>>> + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
>>> + u64 value;
>>> + int ret;
>>> +
>>> + mutex_lock(&hisi_acc_vdev->enable_mutex);
>>> + ret = hisi_acc_vf_debug_check(seq, vdev);
>>> + if (ret) {
>>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>>> + return 0;
>>
>> Why do we squash the error return here and throughout?
>>
> seq_printf() gives the user a failure message.
> This can be changed to "return ret;"
>
>>> + }
>>> +
>>> + value = readl(vf_qm->io_base + QM_MB_CMD_SEND_BASE);
>>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>>> + seq_printf(seq, "%s:0x%llx\n", "mailbox cmd channel state is OK", value);
>>
>> We didn't test the value, what makes the state OK? Can this readl() or
>> those in qm_wait_dev_not_ready() interfere with the main device flow?
>>
>
> If the cmd channel is normal, it will return a non-all-F value.
> Add exception checking in the next version.
>
> cmd channel read operation will not affect the main migration process.
>
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int hisi_acc_vf_debug_save(struct seq_file *seq, void *data)
>>> +{
>>> + struct device *vf_dev = seq->private;
>>> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
>>> + struct vfio_device *vdev = &core_device->vdev;
>>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>>> + struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->debug_migf;
>>> + struct acc_vf_data *vf_data = &migf->vf_data;
>>> + int ret;
>>> +
>>> + mutex_lock(&hisi_acc_vdev->enable_mutex);
>>> + ret = hisi_acc_vf_debug_check(seq, vdev);
>>> + if (ret) {
>>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>>> + return 0;
>>> + }
>>> +
>>> + vf_data->vf_qm_state = QM_READY;
>>> + ret = vf_qm_read_data(&hisi_acc_vdev->vf_qm, vf_data);
>>> + if (ret) {
>>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>>> + seq_printf(seq, "%s\n", "failed to save device data!");
>>> + return 0;
>>> + }
>>> +
>>> + migf->total_length = sizeof(struct acc_vf_data);
>>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>>> + seq_printf(seq, "%s\n", "successful to save device data!");
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int hisi_acc_vf_data_read(struct seq_file *seq, void *data)
>>> +{
>>> + struct device *vf_dev = seq->private;
>>> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
>>> + struct vfio_device *vdev = &core_device->vdev;
>>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>>> + struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf;
>>> + size_t vf_data_sz = offsetofend(struct acc_vf_data, padding);
>>> +
>>> + if (debug_migf && debug_migf->total_length)
>>> + seq_hex_dump(seq, "Mig Data:", DUMP_PREFIX_OFFSET, 16, 1,
>>> + (unsigned char *)&debug_migf->vf_data,
>>> + vf_data_sz, false);
>>> + else
>>> + seq_printf(seq, "%s\n", "device not migrated!");
>>
>> "device state not saved"? Although I don't recall why this doesn't
>> just return an errno.
>>
>
> OK, those who exit directly without migration will be processed according
> to the error mode and an error code will be returned.
>
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int hisi_acc_vf_attr_read(struct seq_file *seq, void *data)
>>> +{
>>> + struct device *vf_dev = seq->private;
>>> + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev);
>>> + struct vfio_device *vdev = &core_device->vdev;
>>> + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev);
>>> + struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf;
>>> +
>>> + if (debug_migf && debug_migf->total_length) {
>>> + seq_printf(seq,
>>> + "acc device:\n"
>>> + "device state: %d\n"
>>> + "device ready: %u\n"
>>> + "data valid: %d\n"
>>> + "data size: %lu\n",
>>> + hisi_acc_vdev->mig_state,
>>
>> This is redundant to migration/state, however note
>
> OK!
>
>> hisi_acc_vfio_pci_get_device_state() protects the value with state
>> mutex while reading it.
>>
>>> + hisi_acc_vdev->vf_qm_state,
>>
>> What's the purpose of this, it's ready or not, what does that mean?
>>
>
> If this status is not ready, the live migration process will exit directly.
> It indicates that there are two possible exceptions:
> 1. The acc device driver in the VM does not have insmod.
> 2. The acc device driver in the VM is insmoded, but the cmd channel is abnormal.
>
>>> + debug_migf->disabled,
>>
>> What's the purpose of this?
>
> Get the enable status of migf file in the driver.
>
>>
>>> + debug_migf->total_length);
>>
>> Why not just have this printed or inferred via the above data_read
>> function, this all seems unnecessary.
>>
>
> This is used to obtain some key status of the live migration driver.
> It is more important than data in problem location.
> So it is output in key-value pairs.
>
> The above data is directly output in the form of hexadecimal data.
> It is used for more detailed analysis when there are no abnormalities
> in the key status.
>
>>> + } else {
>>> + seq_printf(seq, "%s\n", "device not migrated!");
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int hisi_acc_vfio_debug_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>>> +{
>>> + struct vfio_device *vdev = &hisi_acc_vdev->core_device.vdev;
>>> + struct dentry *vfio_dev_migration = NULL;
>>> + struct dentry *vfio_hisi_acc = NULL;
>>> + struct device *dev = vdev->dev;
>>> + void *migf = NULL;
>>> +
>>> + if (!debugfs_initialized())
>>> + return 0;
>>> +
>>> + migf = kzalloc(sizeof(struct hisi_acc_vf_migration_file), GFP_KERNEL);
>>> + if (!migf)
>>> + return -ENOMEM;
>>> + hisi_acc_vdev->debug_migf = migf;
>>> +
>>> + vfio_dev_migration = debugfs_lookup("migration", vdev->debug_root);
>>
>> Fails to build without CONFIG_DEBUG_FS=y There should be a dependency
>> on CONFIG_VFIO_DEBUGFS here somewhere.
>>
>
> Yes, the driver needs to add "!IS_ENABLED(CONFIG_VFIO_DEBUGFS)"
> behind "debugfs_initialized()" above.
>
>>> + if (!vfio_dev_migration) {
>>> + kfree(migf);
>>
>> hisi_acc_vdev->debug_migf still points at this freed buffer, the return
>> value of this function is not tested, allows a use-after-free in
>> all of the below debugfs interfaces.
>>
>
> Yes, there needs to add "hisi_acc_vdev->debug_migf = NULL"
>
>>> + dev_err(dev, "failed to lookup migration debugfs file!\n");
>>> + return -ENODEV;
>>> + }
>>> +
>>> + vfio_hisi_acc = debugfs_create_dir("hisi_acc", vfio_dev_migration);
>>> + debugfs_create_devm_seqfile(dev, "data", vfio_hisi_acc,
>>> + hisi_acc_vf_data_read);
>>> + debugfs_create_devm_seqfile(dev, "attr", vfio_hisi_acc,
>>> + hisi_acc_vf_attr_read);
>>> + debugfs_create_devm_seqfile(dev, "cmd_state", vfio_hisi_acc,
>>> + hisi_acc_vf_debug_cmd);
>>> + debugfs_create_devm_seqfile(dev, "save", vfio_hisi_acc,
>>> + hisi_acc_vf_debug_save);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static void hisi_acc_vf_debugfs_exit(struct hisi_acc_vf_core_device *hisi_acc_vdev)
>>> +{
>>> + if (!debugfs_initialized())
>>> + return;
>>> +
>>> + kfree(hisi_acc_vdev->debug_migf);
>>
>> Double free if the lookup in init fails.
>>
>
> After adding "hisi_acc_vdev->debug_migf = NULL" above.
> These processing codes need to be added here:
>
> if (hisi_acc_vdev->debug_migf)
> kfree(hisi_acc_vdev->debug_migf);
>
>>> +}
>>> +
>>> static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
>>> {
>>> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
>>> @@ -1311,9 +1506,11 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
>>> return ret;
>>> }
>>> hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
>>> + hisi_acc_vdev->dev_opened = DEV_OPEN;
>>
>> = true!
>>
>
> OK, the next version will not use enumeration values. will use true/false assignment.
>
>>> }
>>>
>>> vfio_pci_core_finish_enable(vdev);
>>> +
>>> return 0;
>>> }
>>>
>>> @@ -1322,7 +1519,10 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
>>> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
>>> struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
>>>
>>> + hisi_acc_vdev->dev_opened = DEV_CLOSE;
>>> + mutex_lock(&hisi_acc_vdev->enable_mutex);
>>> iounmap(vf_qm->io_base);
>>> + mutex_unlock(&hisi_acc_vdev->enable_mutex);
>>> vfio_pci_core_close_device(core_vdev);
>>> }
>>>
>>> @@ -1413,6 +1613,9 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device
>>> ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device);
>>> if (ret)
>>> goto out_put_vdev;
>>> +
>>> + if (ops == &hisi_acc_vfio_pci_migrn_ops)
>>> + hisi_acc_vfio_debug_init(hisi_acc_vdev);
>>> return 0;
>>>
>>> out_put_vdev:
>>> @@ -1425,6 +1628,7 @@ static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
>>> struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
>>>
>>> vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device);
>>> + hisi_acc_vf_debugfs_exit(hisi_acc_vdev);
>>> vfio_put_device(&hisi_acc_vdev->core_device.vdev);
>>> }
>>>
>>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>>> index 7a9dc87627cd..3a20d81d105c 100644
>>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>>> @@ -52,6 +52,11 @@
>>> #define QM_EQC_DW0 0X8000
>>> #define QM_AEQC_DW0 0X8020
>>>
>>> +enum acc_dev_state {
>>> + DEV_CLOSE = 0x0,
>>> + DEV_OPEN,
>>> +};
>>> +
>>> struct acc_vf_data {
>>> #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
>>> /* QM match information */
>>> @@ -114,5 +119,11 @@ struct hisi_acc_vf_core_device {
>>> int vf_id;
>>> struct hisi_acc_vf_migration_file *resuming_migf;
>>> struct hisi_acc_vf_migration_file *saving_migf;
>>> +
>>> + /* To make sure the device is enabled */
>>> + struct mutex enable_mutex;
>>> + bool dev_opened;
>>> + /* For debugfs */
>>> + struct hisi_acc_vf_migration_file *debug_migf;
>>> };
>>> #endif /* HISI_ACC_VFIO_PCI_H */
>>
>> .
>>
> Thank you very much for your careful inspection.
> I will revise the inspection issues you mentioned above
> one by one and publish them in the next version.
>
> Thanks again!
> Longfang.
>
> .
>