2022-05-17 22:22:44

by Matthew Rosato

[permalink] [raw]
Subject: [PATCH 0/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

As discussed in this thread:

https://lore.kernel.org/kvm/[email protected]/

Let's remove VFIO_GROUP_NOTIFY_SET_KVM and instead assume the association
has already been established prior to device_open. For the types today
that need a KVM (GVT, vfio-ap) these will fail if a KVM is not found.
Looking ahead, vfio-pci-zdev will optionally want the KVM association
(enable hardware assists) but it will not be a hard requirement (still
want to allow other, non-KVM userspace usage).

This is built on top of Jason's group locking series:
https://github.com/jgunthorpe/linux/commits/vfio_group_locking

And tested with s390x-pci (zdev-kvm series) and vfio-ap (no GVT to test
with)

@Jason since it's based on your initial patch if you'd rather a SoB just
let me know.

Matthew Rosato (1):
vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

drivers/gpu/drm/i915/gvt/gvt.h | 2 -
drivers/gpu/drm/i915/gvt/kvmgt.c | 60 +++++------------------
drivers/s390/crypto/vfio_ap_ops.c | 35 +++-----------
drivers/s390/crypto/vfio_ap_private.h | 3 --
drivers/vfio/vfio.c | 68 +++++++++------------------
include/linux/vfio.h | 5 +-
6 files changed, 41 insertions(+), 132 deletions(-)

--
2.27.0



2022-05-18 01:50:58

by Matthew Rosato

[permalink] [raw]
Subject: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

Rather than relying on a notifier for associating the KVM with
the group, let's assume that the association has already been
made prior to device_open. The first time a device is opened
associate the group KVM with the device.

Suggested-by: Jason Gunthorpe <[email protected]>
Signed-off-by: Matthew Rosato <[email protected]>
---
drivers/gpu/drm/i915/gvt/gvt.h | 2 -
drivers/gpu/drm/i915/gvt/kvmgt.c | 60 +++++------------------
drivers/s390/crypto/vfio_ap_ops.c | 35 +++-----------
drivers/s390/crypto/vfio_ap_private.h | 3 --
drivers/vfio/vfio.c | 68 +++++++++------------------
include/linux/vfio.h | 5 +-
6 files changed, 41 insertions(+), 132 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 2af4c83e733c..633acfcf76bf 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -227,8 +227,6 @@ struct intel_vgpu {
struct mutex cache_lock;

struct notifier_block iommu_notifier;
- struct notifier_block group_notifier;
- struct kvm *kvm;
struct work_struct release_work;
atomic_t released;

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 7655ffa97d51..b317ae4cc7d2 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -761,23 +761,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}

-static int intel_vgpu_group_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct intel_vgpu *vgpu =
- container_of(nb, struct intel_vgpu, group_notifier);
-
- /* the only action we care about */
- if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
- vgpu->kvm = data;
-
- if (!data)
- schedule_work(&vgpu->release_work);
- }
-
- return NOTIFY_OK;
-}
-
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
{
struct intel_vgpu *itr;
@@ -789,7 +772,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
if (!itr->attached)
continue;

- if (vgpu->kvm == itr->kvm) {
+ if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
ret = true;
goto out;
}
@@ -806,7 +789,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
int ret;

vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
- vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;

events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
@@ -817,38 +799,30 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
goto out;
}

- events = VFIO_GROUP_NOTIFY_SET_KVM;
- ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events,
- &vgpu->group_notifier);
- if (ret != 0) {
- gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
- ret);
- goto undo_iommu;
- }
-
ret = -EEXIST;
if (vgpu->attached)
- goto undo_register;
+ goto undo_iommu;

ret = -ESRCH;
- if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
+ if (!vgpu->vfio_device.kvm ||
+ vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
- goto undo_register;
+ goto undo_iommu;
}

ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu))
- goto undo_register;
+ goto undo_iommu;

vgpu->attached = true;
- kvm_get_kvm(vgpu->kvm);

kvmgt_protect_table_init(vgpu);
gvt_cache_init(vgpu);

vgpu->track_node.track_write = kvmgt_page_track_write;
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
- kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
+ kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
+ &vgpu->track_node);

debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
&vgpu->nr_cache_entries);
@@ -858,10 +832,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0);
return 0;

-undo_register:
- vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY,
- &vgpu->group_notifier);
-
undo_iommu:
vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
&vgpu->iommu_notifier);
@@ -898,21 +868,15 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
drm_WARN(&i915->drm, ret,
"vfio_unregister_notifier for iommu failed: %d\n", ret);

- ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY,
- &vgpu->group_notifier);
- drm_WARN(&i915->drm, ret,
- "vfio_unregister_notifier for group failed: %d\n", ret);
-
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));

- kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
- kvm_put_kvm(vgpu->kvm);
+ kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
+ &vgpu->track_node);
kvmgt_protect_table_destroy(vgpu);
gvt_cache_destroy(vgpu);

intel_vgpu_release_msi_eventfd_ctx(vgpu);

- vgpu->kvm = NULL;
vgpu->attached = false;
}

@@ -1713,7 +1677,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {

int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
{
- struct kvm *kvm = info->kvm;
+ struct kvm *kvm = info->vfio_device.kvm;
struct kvm_memory_slot *slot;
int idx;

@@ -1743,7 +1707,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)

int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
{
- struct kvm *kvm = info->kvm;
+ struct kvm *kvm = info->vfio_device.kvm;
struct kvm_memory_slot *slot;
int idx;

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index e8914024f5b1..43c68afe7e87 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
}
}

-static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- int notify_rc = NOTIFY_OK;
- struct ap_matrix_mdev *matrix_mdev;
-
- if (action != VFIO_GROUP_NOTIFY_SET_KVM)
- return NOTIFY_OK;
-
- matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
-
- if (!data)
- vfio_ap_mdev_unset_kvm(matrix_mdev);
- else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
- notify_rc = NOTIFY_DONE;
-
- return notify_rc;
-}
-
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
{
struct device *dev;
@@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
unsigned long events;
int ret;

- matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
- events = VFIO_GROUP_NOTIFY_SET_KVM;
+ if (!vdev->kvm)
+ return -EPERM;

- ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
- &matrix_mdev->group_notifier);
+ ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
if (ret)
return ret;

@@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
&matrix_mdev->iommu_notifier);
if (ret)
- goto out_unregister_group;
+ goto err_kvm;
return 0;

-out_unregister_group:
- vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
- &matrix_mdev->group_notifier);
+err_kvm:
+ vfio_ap_mdev_unset_kvm(matrix_mdev);
return ret;
}

@@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)

vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
&matrix_mdev->iommu_notifier);
- vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
- &matrix_mdev->group_notifier);
vfio_ap_mdev_unset_kvm(matrix_mdev);
}

diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 648fcaf8104a..a26efd804d0d 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -81,8 +81,6 @@ struct ap_matrix {
* @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device.
- * @group_notifier: notifier block used for specifying callback function for
- * handling the VFIO_GROUP_NOTIFY_SET_KVM event
* @iommu_notifier: notifier block used for specifying callback function for
* handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
* @kvm: the struct holding guest's state
@@ -94,7 +92,6 @@ struct ap_matrix_mdev {
struct vfio_device vdev;
struct list_head node;
struct ap_matrix matrix;
- struct notifier_block group_notifier;
struct notifier_block iommu_notifier;
struct kvm *kvm;
crypto_hook pqap_hook;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index cfcff7764403..c5d421eda275 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -10,6 +10,7 @@
* Author: Tom Lyon, [email protected]
*/

+#include "linux/kvm_host.h"
#include <linux/cdev.h>
#include <linux/compat.h>
#include <linux/device.h>
@@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)

mutex_lock(&device->dev_set->lock);
device->open_count++;
+ down_write(&device->group->group_rwsem);
+ if (device->open_count == 1 && device->group->kvm) {
+ device->kvm = device->group->kvm;
+ kvm_get_kvm(device->kvm);
+ }
+ up_write(&device->group->group_rwsem);
+
if (device->open_count == 1 && device->ops->open_device) {
ret = device->ops->open_device(device);
if (ret)
@@ -1123,6 +1131,12 @@ static struct file *vfio_device_open(struct vfio_device *device)
device->ops->close_device(device);
err_undo_count:
device->open_count--;
+ down_write(&device->group->group_rwsem);
+ if (device->open_count == 0 && device->kvm) {
+ kvm_put_kvm(device->kvm);
+ device->kvm = NULL;
+ }
+ up_write(&device->group->group_rwsem);
mutex_unlock(&device->dev_set->lock);
module_put(device->dev->driver->owner);
err_unassign_container:
@@ -1318,6 +1332,12 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
device->open_count--;
+ down_write(&device->group->group_rwsem);
+ if (device->open_count == 0 && device->kvm) {
+ kvm_put_kvm(device->kvm);
+ device->kvm = NULL;
+ }
+ up_write(&device->group->group_rwsem);
mutex_unlock(&device->dev_set->lock);

module_put(device->dev->driver->owner);
@@ -1726,8 +1746,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
* @file: VFIO group file
* @kvm: KVM to link
*
- * The kvm pointer will be forwarded to all the vfio_device's attached to the
- * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
+ * When a VFIO device is first opened the KVM will be available in
+ * device->kvm if one was associated with the group.
*/
void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
{
@@ -1738,8 +1758,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)

down_write(&group->group_rwsem);
group->kvm = kvm;
- blocking_notifier_call_chain(&group->notifier,
- VFIO_GROUP_NOTIFY_SET_KVM, kvm);
up_write(&group->group_rwsem);
}
EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
@@ -2039,42 +2057,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
return ret;
}

-static int vfio_register_group_notifier(struct vfio_group *group,
- unsigned long *events,
- struct notifier_block *nb)
-{
- int ret;
- bool set_kvm = false;
-
- if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
- set_kvm = true;
-
- /* clear known events */
- *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
-
- /* refuse to continue if still events remaining */
- if (*events)
- return -EINVAL;
-
- ret = blocking_notifier_chain_register(&group->notifier, nb);
- if (ret)
- return ret;
-
- /*
- * The attaching of kvm and vfio_group might already happen, so
- * here we replay once upon registration.
- */
- if (set_kvm) {
- down_read(&group->group_rwsem);
- if (group->kvm)
- blocking_notifier_call_chain(&group->notifier,
- VFIO_GROUP_NOTIFY_SET_KVM,
- group->kvm);
- up_read(&group->group_rwsem);
- }
- return 0;
-}
-
int vfio_register_notifier(struct vfio_device *device,
enum vfio_notify_type type, unsigned long *events,
struct notifier_block *nb)
@@ -2090,9 +2072,6 @@ int vfio_register_notifier(struct vfio_device *device,
case VFIO_IOMMU_NOTIFY:
ret = vfio_register_iommu_notifier(group, events, nb);
break;
- case VFIO_GROUP_NOTIFY:
- ret = vfio_register_group_notifier(group, events, nb);
- break;
default:
ret = -EINVAL;
}
@@ -2114,9 +2093,6 @@ int vfio_unregister_notifier(struct vfio_device *device,
case VFIO_IOMMU_NOTIFY:
ret = vfio_unregister_iommu_notifier(group, nb);
break;
- case VFIO_GROUP_NOTIFY:
- ret = blocking_notifier_chain_unregister(&group->notifier, nb);
- break;
default:
ret = -EINVAL;
}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 45b287826ce6..5f691453e3fb 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -36,6 +36,7 @@ struct vfio_device {
struct vfio_device_set *dev_set;
struct list_head dev_set_list;
unsigned int migration_flags;
+ struct kvm *kvm;

/* Members below here are private, not for driver use */
refcount_t refcount;
@@ -155,15 +156,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
/* each type has independent events */
enum vfio_notify_type {
VFIO_IOMMU_NOTIFY = 0,
- VFIO_GROUP_NOTIFY = 1,
};

/* events for VFIO_IOMMU_NOTIFY */
#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)

-/* events for VFIO_GROUP_NOTIFY */
-#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0)
-
extern int vfio_register_notifier(struct vfio_device *device,
enum vfio_notify_type type,
unsigned long *required_events,
--
2.27.0


2022-05-18 01:55:15

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Tue, May 17, 2022 at 02:08:51PM -0400, Matthew Rosato wrote:
> Rather than relying on a notifier for associating the KVM with
> the group, let's assume that the association has already been
> made prior to device_open. The first time a device is opened
> associate the group KVM with the device.

This also fixes a user triggerable oops in gvt

> Suggested-by: Jason Gunthorpe <[email protected]>

You can add my signed-off-by as well

> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index cfcff7764403..c5d421eda275 100644
> +++ b/drivers/vfio/vfio.c
> @@ -10,6 +10,7 @@
> * Author: Tom Lyon, [email protected]
> */
>
> +#include "linux/kvm_host.h"

This is the wrong format of include (editor automation, sigh)

> @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
>
> mutex_lock(&device->dev_set->lock);
> device->open_count++;
> + down_write(&device->group->group_rwsem);

Read I suppose

> + if (device->open_count == 1 && device->group->kvm) {
> + device->kvm = device->group->kvm;
> + kvm_get_kvm(device->kvm);
> + }
> + up_write(&device->group->group_rwsem);

Yeah, this is OK, not very elegant though

I was looking at this - but it could come later too:

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 8e11d9119418be..c5d8dfe8314108 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -10,7 +10,6 @@
* Author: Tom Lyon, [email protected]
*/

-#include "linux/kvm_host.h"
#include <linux/cdev.h>
#include <linux/compat.h>
#include <linux/device.h>
@@ -33,6 +32,7 @@
#include <linux/vfio.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
+#include <linux/kvm_host.h>
#include "vfio.h"

#define DRIVER_VERSION "0.3"
@@ -1059,93 +1059,71 @@ static int vfio_device_assign_container(struct vfio_device *device)

static void vfio_device_unassign_container(struct vfio_device *device)
{
- down_write(&device->group->group_rwsem);
+ lockdep_assert_held(&device->group->group_rwsem);
+
WARN_ON(device->group->container_users <= 1);
device->group->container_users--;
fput(device->group->opened_file);
- up_write(&device->group->group_rwsem);
}

-static struct file *vfio_device_open(struct vfio_device *device)
+static int vfio_device_open(struct vfio_device *device)
{
- struct file *filep;
int ret;

+ lockdep_assert_held(&device->dev_set->lock);
+
+ if (!try_module_get(device->dev->driver->owner))
+ return -ENODEV;
+
down_write(&device->group->group_rwsem);
ret = vfio_device_assign_container(device);
- if (ret) {
- up_write(&device->group->group_rwsem);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto err_unlock;

if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM)
{
- if (!device->group->kvm) {
- up_write(&device->group->group_rwsem);
+ if (!device->group->kvm)
goto err_unassign_container;
- }
device->kvm = device->group->kvm;
kvm_get_kvm(device->kvm);
}
up_write(&device->group->group_rwsem);

- if (!try_module_get(device->dev->driver->owner)) {
- ret = -ENODEV;
- goto err_put_kvm;
- }
-
- mutex_lock(&device->dev_set->lock);
- device->open_count++;
- if (device->open_count == 1 && device->ops->open_device) {
+ if (device->ops->open_device) {
ret = device->ops->open_device(device);
if (ret)
- goto err_undo_count;
+ goto err_put_kvm;
}
- mutex_unlock(&device->dev_set->lock);
+ return 0;

- /*
- * We can't use anon_inode_getfd() because we need to modify
- * the f_mode flags directly to allow more than just ioctls
- */
- filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
- device, O_RDWR);
- if (IS_ERR(filep)) {
- ret = PTR_ERR(filep);
- goto err_close_device;
+err_put_kvm:
+ if (device->kvm) {
+ kvm_put_kvm(device->kvm);
+ device->kvm = NULL;
}
+ down_write(&device->group->group_rwsem);
+err_unassign_container:
+ vfio_device_unassign_container(device);
+err_unlock:
+ up_write(&device->group->group_rwsem);
+ module_put(device->dev->driver->owner);
+ return ret;
+}

- /*
- * TODO: add an anon_inode interface to do this.
- * Appears to be missing by lack of need rather than
- * explicitly prevented. Now there's need.
- */
- filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
-
- if (device->group->type == VFIO_NO_IOMMU)
- dev_warn(device->dev, "vfio-noiommu device opened by user "
- "(%s:%d)\n", current->comm, task_pid_nr(current));
- /*
- * On success the ref of device is moved to the file and
- * put in vfio_device_fops_release()
- */
- return filep;
+static void vfio_device_close(struct vfio_device *device)
+{
+ lockdep_assert_held(&device->dev_set->lock);

-err_close_device:
- mutex_lock(&device->dev_set->lock);
- if (device->open_count == 1 && device->ops->close_device)
+ if (device->ops->close_device)
device->ops->close_device(device);
-err_undo_count:
- device->open_count--;
- mutex_unlock(&device->dev_set->lock);
- module_put(device->dev->driver->owner);
-err_put_kvm:
if (device->kvm) {
kvm_put_kvm(device->kvm);
device->kvm = NULL;
}
-err_unassign_container:
+ down_write(&device->group->group_rwsem);
vfio_device_unassign_container(device);
- return ERR_PTR(ret);
+ up_write(&device->group->group_rwsem);
+ module_put(device->dev->driver->owner);
}

static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
@@ -1159,23 +1137,61 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
if (IS_ERR(device))
return PTR_ERR(device);

- fdno = get_unused_fd_flags(O_CLOEXEC);
- if (fdno < 0) {
- ret = fdno;
- goto err_put_device;
+ mutex_lock(&device->dev_set->lock);
+ device->open_count++;
+ if (device->open_count == 1) {
+ ret = vfio_device_open(device);
+ if (ret) {
+ device->open_count--;
+ mutex_unlock(&device->dev_set->lock);
+ goto err_put_device;
+ }
}
+ mutex_unlock(&device->dev_set->lock);

- filep = vfio_device_open(device);
+ /*
+ * We can't use anon_inode_getfd() because we need to modify
+ * the f_mode flags directly to allow more than just ioctls
+ */
+ filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, device,
+ O_RDWR);
if (IS_ERR(filep)) {
ret = PTR_ERR(filep);
- goto err_put_fdno;
+ goto err_close_device;
+ }
+
+ /*
+ * TODO: add an anon_inode interface to do this.
+ * Appears to be missing by lack of need rather than
+ * explicitly prevented. Now there's need.
+ */
+ filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+
+ fdno = get_unused_fd_flags(O_CLOEXEC);
+ if (fdno < 0) {
+ ret = fdno;
+ goto err_put_file;
}

+ if (device->group->type == VFIO_NO_IOMMU)
+ dev_warn(device->dev, "vfio-noiommu device opened by user "
+ "(%s:%d)\n", current->comm, task_pid_nr(current));
+
+ /*
+ * On success the ref of device is moved to the file and put in
+ * vfio_device_fops_release().
+ */
fd_install(fdno, filep);
return fdno;

-err_put_fdno:
- put_unused_fd(fdno);
+err_put_file:
+ fput(filep);
+err_close_device:
+ mutex_lock(&device->dev_set->lock);
+ if (device->open_count == 1)
+ vfio_device_close(device);
+ device->open_count--;
+ mutex_unlock(&device->dev_set->lock);
err_put_device:
vfio_device_put(device);
return ret;
@@ -1333,19 +1349,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)

mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
- if (device->open_count == 1 && device->ops->close_device)
- device->ops->close_device(device);
+ if (device->open_count == 1)
+ vfio_device_close(device);
device->open_count--;
mutex_unlock(&device->dev_set->lock);

- module_put(device->dev->driver->owner);
-
- if (device->kvm) {
- kvm_put_kvm(device->kvm);
- device->kvm = NULL;
- }
- vfio_device_unassign_container(device);
-
vfio_device_put(device);

return 0;

2022-05-18 07:40:09

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

> if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM)
> {

Nit: this is not the normal brace placement.

But what is you diff against anyway? The one Matthew sent did away
with the VFIO_DEVICE_NEEDS_KVM flags, which does the wrong thing for
zpci, so it can't be that..

Also if we want to do major code movement, it really needs to go into
a separate patch or patches, as the combinations of all these moves
with actual code changes is almost unreadable.

2022-05-18 07:48:30

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

With this the release_work in gvt can go away as well:

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 633acfcf76bf2..aee1a45da74bc 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -227,7 +227,6 @@ struct intel_vgpu {
struct mutex cache_lock;

struct notifier_block iommu_notifier;
- struct work_struct release_work;
atomic_t released;

struct kvm_page_track_notifier_node track_node;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index b317ae4cc7d2d..917617d7599a9 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
}
}

-static void intel_vgpu_release_work(struct work_struct *work);
-
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size)
{
@@ -850,8 +848,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
}
}

-static void __intel_vgpu_release(struct intel_vgpu *vgpu)
+static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
+ struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
int ret;

@@ -880,19 +879,6 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
vgpu->attached = false;
}

-static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
-{
- __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
-}
-
-static void intel_vgpu_release_work(struct work_struct *work)
-{
- struct intel_vgpu *vgpu =
- container_of(work, struct intel_vgpu, release_work);
-
- __intel_vgpu_release(vgpu);
-}
-
static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
{
u32 start_lo, start_hi;
@@ -1639,7 +1625,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
return PTR_ERR(vgpu);
}

- INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
&intel_vgpu_dev_ops);


2022-05-18 12:04:36

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Wed, May 18, 2022 at 12:39:41AM -0700, Christoph Hellwig wrote:
> > if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM)
> > {
>
> Nit: this is not the normal brace placement.
>
> But what is you diff against anyway? The one Matthew sent did away
> with the VFIO_DEVICE_NEEDS_KVM flags, which does the wrong thing for
> zpci, so it can't be that..

Against what I sent before, I did this before Matthew sent his

> Also if we want to do major code movement, it really needs to go into
> a separate patch or patches, as the combinations of all these moves
> with actual code changes is almost unreadable.

Sure, just checking how things could look at this point

Matthew's version first followed by the code motion is probably a good
approach.

Jason

2022-05-18 15:04:22

by Matthew Rosato

[permalink] [raw]
Subject: Re: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On 5/17/22 2:08 PM, Matthew Rosato wrote:
> Rather than relying on a notifier for associating the KVM with
> the group, let's assume that the association has already been
> made prior to device_open. The first time a device is opened
> associate the group KVM with the device.
>
> Suggested-by: Jason Gunthorpe <[email protected]>
> Signed-off-by: Matthew Rosato <[email protected]>

...

> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index cfcff7764403..c5d421eda275 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -10,6 +10,7 @@
> * Author: Tom Lyon, [email protected]
> */
>
> +#include "linux/kvm_host.h"
> #include <linux/cdev.h>
> #include <linux/compat.h>
> #include <linux/device.h>
> @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
>
> mutex_lock(&device->dev_set->lock);
> device->open_count++;
> + down_write(&device->group->group_rwsem);
> + if (device->open_count == 1 && device->group->kvm) {
> + device->kvm = device->group->kvm;
> + kvm_get_kvm(device->kvm);

Did some more compile testing, since vfio has no hard kvm dependency,
kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio
is built-in...

2022-05-18 15:14:41

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote:
> On 5/17/22 2:08 PM, Matthew Rosato wrote:
> > Rather than relying on a notifier for associating the KVM with
> > the group, let's assume that the association has already been
> > made prior to device_open. The first time a device is opened
> > associate the group KVM with the device.
> >
> > Suggested-by: Jason Gunthorpe <[email protected]>
> > Signed-off-by: Matthew Rosato <[email protected]>
>
> ...
>
> > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> > index cfcff7764403..c5d421eda275 100644
> > +++ b/drivers/vfio/vfio.c
> > @@ -10,6 +10,7 @@
> > * Author: Tom Lyon, [email protected]
> > */
> > +#include "linux/kvm_host.h"
> > #include <linux/cdev.h>
> > #include <linux/compat.h>
> > #include <linux/device.h>
> > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
> > mutex_lock(&device->dev_set->lock);
> > device->open_count++;
> > + down_write(&device->group->group_rwsem);
> > + if (device->open_count == 1 && device->group->kvm) {
> > + device->kvm = device->group->kvm;
> > + kvm_get_kvm(device->kvm);
>
> Did some more compile testing, since vfio has no hard kvm dependency,
> kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is
> built-in...

Ugh, my other plan was to have the driver itself capture the kvm, ie
we lock the group_rwsem to keep the group->kvm valid and then pass the
kvm to open_device in some way, then the driver can kvm_get_kvm() it

Alternatively, I don't know why kvm_get_kvm() is an exported symbol
when it is just calling refcount_inc() - inlining it would be an
improvement I think.

Jason

2022-05-18 15:36:33

by Matthew Rosato

[permalink] [raw]
Subject: Re: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On 5/18/22 11:12 AM, Jason Gunthorpe wrote:
> On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote:
>> On 5/17/22 2:08 PM, Matthew Rosato wrote:
>>> Rather than relying on a notifier for associating the KVM with
>>> the group, let's assume that the association has already been
>>> made prior to device_open. The first time a device is opened
>>> associate the group KVM with the device.
>>>
>>> Suggested-by: Jason Gunthorpe <[email protected]>
>>> Signed-off-by: Matthew Rosato <[email protected]>
>>
>> ...
>>
>>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
>>> index cfcff7764403..c5d421eda275 100644
>>> +++ b/drivers/vfio/vfio.c
>>> @@ -10,6 +10,7 @@
>>> * Author: Tom Lyon, [email protected]
>>> */
>>> +#include "linux/kvm_host.h"
>>> #include <linux/cdev.h>
>>> #include <linux/compat.h>
>>> #include <linux/device.h>
>>> @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
>>> mutex_lock(&device->dev_set->lock);
>>> device->open_count++;
>>> + down_write(&device->group->group_rwsem);
>>> + if (device->open_count == 1 && device->group->kvm) {
>>> + device->kvm = device->group->kvm;
>>> + kvm_get_kvm(device->kvm);
>>
>> Did some more compile testing, since vfio has no hard kvm dependency,
>> kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is
>> built-in...
>
> Ugh, my other plan was to have the driver itself capture the kvm, ie
> we lock the group_rwsem to keep the group->kvm valid and then pass the
> kvm to open_device in some way, then the driver can kvm_get_kvm() it
>

Hrm... If we did that we would have to re-evaluate some other usage of
the rwsem e.g. if driver open_device calls vfio_register_iommu_notifier
it will try to get the rwsem but it's already locked.

> Alternatively, I don't know why kvm_get_kvm() is an exported symbol
> when it is just calling refcount_inc() - inlining it would be an
> improvement I think.
>

I think that would work for kvm_get_kvm, but kvm_put_kvm (which we also
need) calls kvm_destroy_kvm after the refcount_dec and that can't be inlined

2022-05-18 15:42:29

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Wed, May 18, 2022 at 11:33:37AM -0400, Matthew Rosato wrote:
> On 5/18/22 11:12 AM, Jason Gunthorpe wrote:
> > On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote:
> > > On 5/17/22 2:08 PM, Matthew Rosato wrote:
> > > > Rather than relying on a notifier for associating the KVM with
> > > > the group, let's assume that the association has already been
> > > > made prior to device_open. The first time a device is opened
> > > > associate the group KVM with the device.
> > > >
> > > > Suggested-by: Jason Gunthorpe <[email protected]>
> > > > Signed-off-by: Matthew Rosato <[email protected]>
> > >
> > > ...
> > >
> > > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> > > > index cfcff7764403..c5d421eda275 100644
> > > > +++ b/drivers/vfio/vfio.c
> > > > @@ -10,6 +10,7 @@
> > > > * Author: Tom Lyon, [email protected]
> > > > */
> > > > +#include "linux/kvm_host.h"
> > > > #include <linux/cdev.h>
> > > > #include <linux/compat.h>
> > > > #include <linux/device.h>
> > > > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
> > > > mutex_lock(&device->dev_set->lock);
> > > > device->open_count++;
> > > > + down_write(&device->group->group_rwsem);
> > > > + if (device->open_count == 1 && device->group->kvm) {
> > > > + device->kvm = device->group->kvm;
> > > > + kvm_get_kvm(device->kvm);
> > >
> > > Did some more compile testing, since vfio has no hard kvm dependency,
> > > kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is
> > > built-in...
> >
> > Ugh, my other plan was to have the driver itself capture the kvm, ie
> > we lock the group_rwsem to keep the group->kvm valid and then pass the
> > kvm to open_device in some way, then the driver can kvm_get_kvm() it
> >
>
> Hrm... If we did that we would have to re-evaluate some other usage of the
> rwsem e.g. if driver open_device calls vfio_register_iommu_notifier it will
> try to get the rwsem but it's already locked.

Ugh, yes, it means removing the other notifier callback too, which I
was expecting to do as well

Maybe we could split the lock for just this patch though.

> > Alternatively, I don't know why kvm_get_kvm() is an exported symbol
> > when it is just calling refcount_inc() - inlining it would be an
> > improvement I think.
>
> I think that would work for kvm_get_kvm, but kvm_put_kvm (which we also
> need) calls kvm_destroy_kvm after the refcount_dec and that can't be inlined

Indeed.

Jason