2022-05-20 12:43:20

by Matthew Rosato

[permalink] [raw]
Subject: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

Rather than relying on a notifier for associating the KVM with
the group, let's assume that the association has already been
made prior to device_open. The first time a device is opened
associate the group KVM with the device.

This fixes a user-triggerable oops in GVT.

Reviewed-by: Tony Krowiak <[email protected]>
Reviewed-by: Kevin Tian <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Jason Gunthorpe <[email protected]>
Signed-off-by: Matthew Rosato <[email protected]>
---
drivers/gpu/drm/i915/gvt/gtt.c | 4 +-
drivers/gpu/drm/i915/gvt/gvt.h | 3 -
drivers/gpu/drm/i915/gvt/kvmgt.c | 82 ++++++--------------------
drivers/s390/crypto/vfio_ap_ops.c | 35 ++---------
drivers/s390/crypto/vfio_ap_private.h | 3 -
drivers/vfio/vfio.c | 83 ++++++++++-----------------
include/linux/vfio.h | 6 +-
7 files changed, 57 insertions(+), 159 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 9c5cc2800975..b4f69364f9a1 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192;

static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
{
- struct kvm *kvm = vgpu->kvm;
+ struct kvm *kvm = vgpu->vfio_device.kvm;
int idx;
bool ret;

@@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,

if (!vgpu->attached)
return -EINVAL;
- pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry));
+ pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
if (is_error_noslot_pfn(pfn))
return -EINVAL;
return PageTransHuge(pfn_to_page(pfn));
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 2af4c83e733c..aee1a45da74b 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -227,9 +227,6 @@ struct intel_vgpu {
struct mutex cache_lock;

struct notifier_block iommu_notifier;
- struct notifier_block group_notifier;
- struct kvm *kvm;
- struct work_struct release_work;
atomic_t released;

struct kvm_page_track_notifier_node track_node;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 7655ffa97d51..e2f6c56ab342 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
}
}

-static void intel_vgpu_release_work(struct work_struct *work);
-
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size)
{
@@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}

-static int intel_vgpu_group_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct intel_vgpu *vgpu =
- container_of(nb, struct intel_vgpu, group_notifier);
-
- /* the only action we care about */
- if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
- vgpu->kvm = data;
-
- if (!data)
- schedule_work(&vgpu->release_work);
- }
-
- return NOTIFY_OK;
-}
-
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
{
struct intel_vgpu *itr;
@@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
if (!itr->attached)
continue;

- if (vgpu->kvm == itr->kvm) {
+ if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
ret = true;
goto out;
}
@@ -806,7 +787,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
int ret;

vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
- vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;

events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
@@ -817,38 +797,32 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
goto out;
}

- events = VFIO_GROUP_NOTIFY_SET_KVM;
- ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events,
- &vgpu->group_notifier);
- if (ret != 0) {
- gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
- ret);
- goto undo_iommu;
- }
-
ret = -EEXIST;
if (vgpu->attached)
- goto undo_register;
+ goto undo_iommu;

ret = -ESRCH;
- if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
+ if (!vgpu->vfio_device.kvm ||
+ vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
- goto undo_register;
+ goto undo_iommu;
}

+ kvm_get_kvm(vgpu->vfio_device.kvm);
+
ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu))
- goto undo_register;
+ goto undo_iommu;

vgpu->attached = true;
- kvm_get_kvm(vgpu->kvm);

kvmgt_protect_table_init(vgpu);
gvt_cache_init(vgpu);

vgpu->track_node.track_write = kvmgt_page_track_write;
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
- kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
+ kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
+ &vgpu->track_node);

debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
&vgpu->nr_cache_entries);
@@ -858,10 +832,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0);
return 0;

-undo_register:
- vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY,
- &vgpu->group_notifier);
-
undo_iommu:
vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
&vgpu->iommu_notifier);
@@ -880,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
}
}

-static void __intel_vgpu_release(struct intel_vgpu *vgpu)
+static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
+ struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
int ret;

@@ -898,35 +869,19 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
drm_WARN(&i915->drm, ret,
"vfio_unregister_notifier for iommu failed: %d\n", ret);

- ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY,
- &vgpu->group_notifier);
- drm_WARN(&i915->drm, ret,
- "vfio_unregister_notifier for group failed: %d\n", ret);
-
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));

- kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
- kvm_put_kvm(vgpu->kvm);
+ kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
+ &vgpu->track_node);
kvmgt_protect_table_destroy(vgpu);
gvt_cache_destroy(vgpu);

intel_vgpu_release_msi_eventfd_ctx(vgpu);

- vgpu->kvm = NULL;
vgpu->attached = false;
-}
-
-static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
-{
- __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
-}
-
-static void intel_vgpu_release_work(struct work_struct *work)
-{
- struct intel_vgpu *vgpu =
- container_of(work, struct intel_vgpu, release_work);

- __intel_vgpu_release(vgpu);
+ if (vgpu->vfio_device.kvm)
+ kvm_put_kvm(vgpu->vfio_device.kvm);
}

static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
@@ -1675,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
return PTR_ERR(vgpu);
}

- INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
&intel_vgpu_dev_ops);

@@ -1713,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {

int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
{
- struct kvm *kvm = info->kvm;
+ struct kvm *kvm = info->vfio_device.kvm;
struct kvm_memory_slot *slot;
int idx;

@@ -1743,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)

int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
{
- struct kvm *kvm = info->kvm;
+ struct kvm *kvm = info->vfio_device.kvm;
struct kvm_memory_slot *slot;
int idx;

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index e8914024f5b1..a7d2a95796d3 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
}
}

-static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- int notify_rc = NOTIFY_OK;
- struct ap_matrix_mdev *matrix_mdev;
-
- if (action != VFIO_GROUP_NOTIFY_SET_KVM)
- return NOTIFY_OK;
-
- matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
-
- if (!data)
- vfio_ap_mdev_unset_kvm(matrix_mdev);
- else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
- notify_rc = NOTIFY_DONE;
-
- return notify_rc;
-}
-
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
{
struct device *dev;
@@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
unsigned long events;
int ret;

- matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
- events = VFIO_GROUP_NOTIFY_SET_KVM;
+ if (!vdev->kvm)
+ return -EINVAL;

- ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
- &matrix_mdev->group_notifier);
+ ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
if (ret)
return ret;

@@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
&matrix_mdev->iommu_notifier);
if (ret)
- goto out_unregister_group;
+ goto err_kvm;
return 0;

-out_unregister_group:
- vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
- &matrix_mdev->group_notifier);
+err_kvm:
+ vfio_ap_mdev_unset_kvm(matrix_mdev);
return ret;
}

@@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)

vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
&matrix_mdev->iommu_notifier);
- vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
- &matrix_mdev->group_notifier);
vfio_ap_mdev_unset_kvm(matrix_mdev);
}

diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 648fcaf8104a..a26efd804d0d 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -81,8 +81,6 @@ struct ap_matrix {
* @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device.
- * @group_notifier: notifier block used for specifying callback function for
- * handling the VFIO_GROUP_NOTIFY_SET_KVM event
* @iommu_notifier: notifier block used for specifying callback function for
* handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
* @kvm: the struct holding guest's state
@@ -94,7 +92,6 @@ struct ap_matrix_mdev {
struct vfio_device vdev;
struct list_head node;
struct ap_matrix matrix;
- struct notifier_block group_notifier;
struct notifier_block iommu_notifier;
struct kvm *kvm;
crypto_hook pqap_hook;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index cfcff7764403..831fc722e3f8 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1083,10 +1083,21 @@ static struct file *vfio_device_open(struct vfio_device *device)

mutex_lock(&device->dev_set->lock);
device->open_count++;
- if (device->open_count == 1 && device->ops->open_device) {
- ret = device->ops->open_device(device);
- if (ret)
- goto err_undo_count;
+ if (device->open_count == 1) {
+ /*
+ * Here we pass the KVM pointer with the group under the read
+ * lock. If the device driver will use it, it must obtain a
+ * reference and release it during close_device.
+ */
+ down_read(&device->group->group_rwsem);
+ device->kvm = device->group->kvm;
+
+ if (device->ops->open_device) {
+ ret = device->ops->open_device(device);
+ if (ret)
+ goto err_undo_count;
+ }
+ up_read(&device->group->group_rwsem);
}
mutex_unlock(&device->dev_set->lock);

@@ -1119,10 +1130,14 @@ static struct file *vfio_device_open(struct vfio_device *device)

err_close_device:
mutex_lock(&device->dev_set->lock);
+ down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
err_undo_count:
device->open_count--;
+ if (device->open_count == 0 && device->kvm)
+ device->kvm = NULL;
+ up_read(&device->group->group_rwsem);
mutex_unlock(&device->dev_set->lock);
module_put(device->dev->driver->owner);
err_unassign_container:
@@ -1315,9 +1330,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)

mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
+ down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
+ up_read(&device->group->group_rwsem);
device->open_count--;
+ if (device->open_count == 0)
+ device->kvm = NULL;
mutex_unlock(&device->dev_set->lock);

module_put(device->dev->driver->owner);
@@ -1726,8 +1745,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
* @file: VFIO group file
* @kvm: KVM to link
*
- * The kvm pointer will be forwarded to all the vfio_device's attached to the
- * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
+ * When a VFIO device is first opened the KVM will be available in
+ * device->kvm if one was associated with the group.
*/
void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
{
@@ -1738,8 +1757,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)

down_write(&group->group_rwsem);
group->kvm = kvm;
- blocking_notifier_call_chain(&group->notifier,
- VFIO_GROUP_NOTIFY_SET_KVM, kvm);
up_write(&group->group_rwsem);
}
EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
@@ -2006,7 +2023,8 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
struct vfio_iommu_driver *driver;
int ret;

- down_read(&group->group_rwsem);
+ lockdep_assert_held_read(&group->group_rwsem);
+
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->register_notifier))
@@ -2014,7 +2032,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
events, nb);
else
ret = -ENOTTY;
- up_read(&group->group_rwsem);

return ret;
}
@@ -2026,7 +2043,8 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
struct vfio_iommu_driver *driver;
int ret;

- down_read(&group->group_rwsem);
+ lockdep_assert_held_read(&group->group_rwsem);
+
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->unregister_notifier))
@@ -2034,47 +2052,10 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
nb);
else
ret = -ENOTTY;
- up_read(&group->group_rwsem);

return ret;
}

-static int vfio_register_group_notifier(struct vfio_group *group,
- unsigned long *events,
- struct notifier_block *nb)
-{
- int ret;
- bool set_kvm = false;
-
- if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
- set_kvm = true;
-
- /* clear known events */
- *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
-
- /* refuse to continue if still events remaining */
- if (*events)
- return -EINVAL;
-
- ret = blocking_notifier_chain_register(&group->notifier, nb);
- if (ret)
- return ret;
-
- /*
- * The attaching of kvm and vfio_group might already happen, so
- * here we replay once upon registration.
- */
- if (set_kvm) {
- down_read(&group->group_rwsem);
- if (group->kvm)
- blocking_notifier_call_chain(&group->notifier,
- VFIO_GROUP_NOTIFY_SET_KVM,
- group->kvm);
- up_read(&group->group_rwsem);
- }
- return 0;
-}
-
int vfio_register_notifier(struct vfio_device *device,
enum vfio_notify_type type, unsigned long *events,
struct notifier_block *nb)
@@ -2090,9 +2071,6 @@ int vfio_register_notifier(struct vfio_device *device,
case VFIO_IOMMU_NOTIFY:
ret = vfio_register_iommu_notifier(group, events, nb);
break;
- case VFIO_GROUP_NOTIFY:
- ret = vfio_register_group_notifier(group, events, nb);
- break;
default:
ret = -EINVAL;
}
@@ -2114,9 +2092,6 @@ int vfio_unregister_notifier(struct vfio_device *device,
case VFIO_IOMMU_NOTIFY:
ret = vfio_unregister_iommu_notifier(group, nb);
break;
- case VFIO_GROUP_NOTIFY:
- ret = blocking_notifier_chain_unregister(&group->notifier, nb);
- break;
default:
ret = -EINVAL;
}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 45b287826ce6..aa888cc51757 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -36,6 +36,8 @@ struct vfio_device {
struct vfio_device_set *dev_set;
struct list_head dev_set_list;
unsigned int migration_flags;
+ /* Driver must reference the kvm during open_device or never touch it */
+ struct kvm *kvm;

/* Members below here are private, not for driver use */
refcount_t refcount;
@@ -155,15 +157,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
/* each type has independent events */
enum vfio_notify_type {
VFIO_IOMMU_NOTIFY = 0,
- VFIO_GROUP_NOTIFY = 1,
};

/* events for VFIO_IOMMU_NOTIFY */
#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)

-/* events for VFIO_GROUP_NOTIFY */
-#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0)
-
extern int vfio_register_notifier(struct vfio_device *device,
enum vfio_notify_type type,
unsigned long *required_events,
--
2.27.0



2022-05-21 03:20:06

by Matthew Rosato

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On 5/20/22 9:56 AM, Tony Krowiak wrote:
>
>
> On 5/19/22 2:33 PM, Matthew Rosato wrote:
>> Rather than relying on a notifier for associating the KVM with
>> the group, let's assume that the association has already been
>> made prior to device_open.  The first time a device is opened
>> associate the group KVM with the device.
>>
>> This fixes a user-triggerable oops in GVT.
>>
>> Reviewed-by: Tony Krowiak <[email protected]>
>> Reviewed-by: Kevin Tian <[email protected]>
>> Reviewed-by: Christoph Hellwig <[email protected]>
>> Signed-off-by: Jason Gunthorpe <[email protected]>
>> Signed-off-by: Matthew Rosato <[email protected]>
>> ---
>>   drivers/gpu/drm/i915/gvt/gtt.c        |  4 +-
>>   drivers/gpu/drm/i915/gvt/gvt.h        |  3 -
>>   drivers/gpu/drm/i915/gvt/kvmgt.c      | 82 ++++++--------------------
>>   drivers/s390/crypto/vfio_ap_ops.c     | 35 ++---------
>>   drivers/s390/crypto/vfio_ap_private.h |  3 -
>>   drivers/vfio/vfio.c                   | 83 ++++++++++-----------------
>>   include/linux/vfio.h                  |  6 +-
>>   7 files changed, 57 insertions(+), 159 deletions(-)
>>
>>
>> diff --git a/drivers/s390/crypto/vfio_ap_ops.c
>> b/drivers/s390/crypto/vfio_ap_ops.c
>> index e8914024f5b1..a7d2a95796d3 100644
>> --- a/drivers/s390/crypto/vfio_ap_ops.c
>> +++ b/drivers/s390/crypto/vfio_ap_ops.c
>> @@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct
>> ap_matrix_mdev *matrix_mdev)
>>       }
>>   }
>> -static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>> -                       unsigned long action, void *data)
>> -{
>> -    int notify_rc = NOTIFY_OK;
>> -    struct ap_matrix_mdev *matrix_mdev;
>> -
>> -    if (action != VFIO_GROUP_NOTIFY_SET_KVM)
>> -        return NOTIFY_OK;
>> -
>> -    matrix_mdev = container_of(nb, struct ap_matrix_mdev,
>> group_notifier);
>> -
>> -    if (!data)
>> -        vfio_ap_mdev_unset_kvm(matrix_mdev);
>> -    else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
>> -        notify_rc = NOTIFY_DONE;
>> -
>> -    return notify_rc;
>> -}
>> -
>>   static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
>>   {
>>       struct device *dev;
>> @@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct
>> vfio_device *vdev)
>>       unsigned long events;
>>       int ret;
>> -    matrix_mdev->group_notifier.notifier_call =
>> vfio_ap_mdev_group_notifier;
>> -    events = VFIO_GROUP_NOTIFY_SET_KVM;
>> +    if (!vdev->kvm)
>> +        return -EINVAL;
>> -    ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
>> -                     &matrix_mdev->group_notifier);
>> +    ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
>>       if (ret)
>>           return ret;
>
> I'm sorry I didn't see this with my last review, but maybe move the call
> to vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm) after the successful
> registration of the IOMMU notifier? This way you won't be plugging AP
> queues
> into the guest only to remove them if the registration fails.

This is a pretty edge error case, and the
vfio_ap_mdev_unset_kvm(matrix_mdev) call at err_kvm should do the proper
cleanup, right? I guess I'm wondering if it's really any different than
the prior code which would have registered the VFIO_GROUP_NOTIFY_SET_KVM
first, which would have immediately triggered the notifier since the KVM
was already registered to the group, meaning it would haved called
vfio_ap_mdev_group_notifier->vfio_ap_mdev_set_kvm anyway (see
vfio_register_group_notifier, the "The attaching of kvm and vfio_group
might already happen..." comment)

>
>> @@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct
>> vfio_device *vdev)
>>       ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
>>                        &matrix_mdev->iommu_notifier);
>>       if (ret)
>> -        goto out_unregister_group;
>> +        goto err_kvm;
>>       return 0;
>> -out_unregister_group:
>> -    vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
>> -                 &matrix_mdev->group_notifier);
>> +err_kvm:
>> +    vfio_ap_mdev_unset_kvm(matrix_mdev);
>>       return ret;
>>   }


2022-05-21 11:13:56

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2022-05-21 17:46:28

by Anthony Krowiak

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM



On 5/19/22 2:33 PM, Matthew Rosato wrote:
> Rather than relying on a notifier for associating the KVM with
> the group, let's assume that the association has already been
> made prior to device_open. The first time a device is opened
> associate the group KVM with the device.
>
> This fixes a user-triggerable oops in GVT.
>
> Reviewed-by: Tony Krowiak <[email protected]>
> Reviewed-by: Kevin Tian <[email protected]>
> Reviewed-by: Christoph Hellwig <[email protected]>
> Signed-off-by: Jason Gunthorpe <[email protected]>
> Signed-off-by: Matthew Rosato <[email protected]>
> ---
> drivers/gpu/drm/i915/gvt/gtt.c | 4 +-
> drivers/gpu/drm/i915/gvt/gvt.h | 3 -
> drivers/gpu/drm/i915/gvt/kvmgt.c | 82 ++++++--------------------
> drivers/s390/crypto/vfio_ap_ops.c | 35 ++---------
> drivers/s390/crypto/vfio_ap_private.h | 3 -
> drivers/vfio/vfio.c | 83 ++++++++++-----------------
> include/linux/vfio.h | 6 +-
> 7 files changed, 57 insertions(+), 159 deletions(-)
>
>
>
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index e8914024f5b1..a7d2a95796d3 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
> }
> }
>
> -static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> -{
> - int notify_rc = NOTIFY_OK;
> - struct ap_matrix_mdev *matrix_mdev;
> -
> - if (action != VFIO_GROUP_NOTIFY_SET_KVM)
> - return NOTIFY_OK;
> -
> - matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
> -
> - if (!data)
> - vfio_ap_mdev_unset_kvm(matrix_mdev);
> - else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
> - notify_rc = NOTIFY_DONE;
> -
> - return notify_rc;
> -}
> -
> static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
> {
> struct device *dev;
> @@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
> unsigned long events;
> int ret;
>
> - matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
> - events = VFIO_GROUP_NOTIFY_SET_KVM;
> + if (!vdev->kvm)
> + return -EINVAL;
>
> - ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
> - &matrix_mdev->group_notifier);
> + ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
> if (ret)
> return ret;

I'm sorry I didn't see this with my last review, but maybe move the call
to vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm) after the successful
registration of the IOMMU notifier? This way you won't be plugging AP queues
into the guest only to remove them if the registration fails.

>
> @@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
> ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
> &matrix_mdev->iommu_notifier);
> if (ret)
> - goto out_unregister_group;
> + goto err_kvm;
> return 0;
>
> -out_unregister_group:
> - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
> - &matrix_mdev->group_notifier);
> +err_kvm:
> + vfio_ap_mdev_unset_kvm(matrix_mdev);
> return ret;
> }
>
> @@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
>
> vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
> &matrix_mdev->iommu_notifier);
> - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
> - &matrix_mdev->group_notifier);
> vfio_ap_mdev_unset_kvm(matrix_mdev);
> }
>
> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> index 648fcaf8104a..a26efd804d0d 100644
> --- a/drivers/s390/crypto/vfio_ap_private.h
> +++ b/drivers/s390/crypto/vfio_ap_private.h
> @@ -81,8 +81,6 @@ struct ap_matrix {
> * @node: allows the ap_matrix_mdev struct to be added to a list
> * @matrix: the adapters, usage domains and control domains assigned to the
> * mediated matrix device.
> - * @group_notifier: notifier block used for specifying callback function for
> - * handling the VFIO_GROUP_NOTIFY_SET_KVM event
> * @iommu_notifier: notifier block used for specifying callback function for
> * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
> * @kvm: the struct holding guest's state
> @@ -94,7 +92,6 @@ struct ap_matrix_mdev {
> struct vfio_device vdev;
> struct list_head node;
> struct ap_matrix matrix;
> - struct notifier_block group_notifier;
> struct notifier_block iommu_notifier;
> struct kvm *kvm;
> crypto_hook pqap_hook;
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index cfcff7764403..831fc722e3f8 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1083,10 +1083,21 @@ static struct file *vfio_device_open(struct vfio_device *device)
>
> mutex_lock(&device->dev_set->lock);
> device->open_count++;
> - if (device->open_count == 1 && device->ops->open_device) {
> - ret = device->ops->open_device(device);
> - if (ret)
> - goto err_undo_count;
> + if (device->open_count == 1) {
> + /*
> + * Here we pass the KVM pointer with the group under the read
> + * lock. If the device driver will use it, it must obtain a
> + * reference and release it during close_device.
> + */
> + down_read(&device->group->group_rwsem);
> + device->kvm = device->group->kvm;
> +
> + if (device->ops->open_device) {
> + ret = device->ops->open_device(device);
> + if (ret)
> + goto err_undo_count;
> + }
> + up_read(&device->group->group_rwsem);
> }
> mutex_unlock(&device->dev_set->lock);
>
> @@ -1119,10 +1130,14 @@ static struct file *vfio_device_open(struct vfio_device *device)
>
> err_close_device:
> mutex_lock(&device->dev_set->lock);
> + down_read(&device->group->group_rwsem);
> if (device->open_count == 1 && device->ops->close_device)
> device->ops->close_device(device);
> err_undo_count:
> device->open_count--;
> + if (device->open_count == 0 && device->kvm)
> + device->kvm = NULL;
> + up_read(&device->group->group_rwsem);
> mutex_unlock(&device->dev_set->lock);
> module_put(device->dev->driver->owner);
> err_unassign_container:
> @@ -1315,9 +1330,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
>
> mutex_lock(&device->dev_set->lock);
> vfio_assert_device_open(device);
> + down_read(&device->group->group_rwsem);
> if (device->open_count == 1 && device->ops->close_device)
> device->ops->close_device(device);
> + up_read(&device->group->group_rwsem);
> device->open_count--;
> + if (device->open_count == 0)
> + device->kvm = NULL;
> mutex_unlock(&device->dev_set->lock);
>
> module_put(device->dev->driver->owner);
> @@ -1726,8 +1745,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
> * @file: VFIO group file
> * @kvm: KVM to link
> *
> - * The kvm pointer will be forwarded to all the vfio_device's attached to the
> - * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
> + * When a VFIO device is first opened the KVM will be available in
> + * device->kvm if one was associated with the group.
> */
> void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
> {
> @@ -1738,8 +1757,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
>
> down_write(&group->group_rwsem);
> group->kvm = kvm;
> - blocking_notifier_call_chain(&group->notifier,
> - VFIO_GROUP_NOTIFY_SET_KVM, kvm);
> up_write(&group->group_rwsem);
> }
> EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
> @@ -2006,7 +2023,8 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
> struct vfio_iommu_driver *driver;
> int ret;
>
> - down_read(&group->group_rwsem);
> + lockdep_assert_held_read(&group->group_rwsem);
> +
> container = group->container;
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->register_notifier))
> @@ -2014,7 +2032,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
> events, nb);
> else
> ret = -ENOTTY;
> - up_read(&group->group_rwsem);
>
> return ret;
> }
> @@ -2026,7 +2043,8 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> struct vfio_iommu_driver *driver;
> int ret;
>
> - down_read(&group->group_rwsem);
> + lockdep_assert_held_read(&group->group_rwsem);
> +
> container = group->container;
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->unregister_notifier))
> @@ -2034,47 +2052,10 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> nb);
> else
> ret = -ENOTTY;
> - up_read(&group->group_rwsem);
>
> return ret;
> }
>
> -static int vfio_register_group_notifier(struct vfio_group *group,
> - unsigned long *events,
> - struct notifier_block *nb)
> -{
> - int ret;
> - bool set_kvm = false;
> -
> - if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
> - set_kvm = true;
> -
> - /* clear known events */
> - *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
> -
> - /* refuse to continue if still events remaining */
> - if (*events)
> - return -EINVAL;
> -
> - ret = blocking_notifier_chain_register(&group->notifier, nb);
> - if (ret)
> - return ret;
> -
> - /*
> - * The attaching of kvm and vfio_group might already happen, so
> - * here we replay once upon registration.
> - */
> - if (set_kvm) {
> - down_read(&group->group_rwsem);
> - if (group->kvm)
> - blocking_notifier_call_chain(&group->notifier,
> - VFIO_GROUP_NOTIFY_SET_KVM,
> - group->kvm);
> - up_read(&group->group_rwsem);
> - }
> - return 0;
> -}
> -
> int vfio_register_notifier(struct vfio_device *device,
> enum vfio_notify_type type, unsigned long *events,
> struct notifier_block *nb)
> @@ -2090,9 +2071,6 @@ int vfio_register_notifier(struct vfio_device *device,
> case VFIO_IOMMU_NOTIFY:
> ret = vfio_register_iommu_notifier(group, events, nb);
> break;
> - case VFIO_GROUP_NOTIFY:
> - ret = vfio_register_group_notifier(group, events, nb);
> - break;
> default:
> ret = -EINVAL;
> }
> @@ -2114,9 +2092,6 @@ int vfio_unregister_notifier(struct vfio_device *device,
> case VFIO_IOMMU_NOTIFY:
> ret = vfio_unregister_iommu_notifier(group, nb);
> break;
> - case VFIO_GROUP_NOTIFY:
> - ret = blocking_notifier_chain_unregister(&group->notifier, nb);
> - break;
> default:
> ret = -EINVAL;
> }
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 45b287826ce6..aa888cc51757 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -36,6 +36,8 @@ struct vfio_device {
> struct vfio_device_set *dev_set;
> struct list_head dev_set_list;
> unsigned int migration_flags;
> + /* Driver must reference the kvm during open_device or never touch it */
> + struct kvm *kvm;
>
> /* Members below here are private, not for driver use */
> refcount_t refcount;
> @@ -155,15 +157,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
> /* each type has independent events */
> enum vfio_notify_type {
> VFIO_IOMMU_NOTIFY = 0,
> - VFIO_GROUP_NOTIFY = 1,
> };
>
> /* events for VFIO_IOMMU_NOTIFY */
> #define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
>
> -/* events for VFIO_GROUP_NOTIFY */
> -#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0)
> -
> extern int vfio_register_notifier(struct vfio_device *device,
> enum vfio_notify_type type,
> unsigned long *required_events,


2022-05-23 05:48:22

by Anthony Krowiak

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM



On 5/20/22 10:09 AM, Matthew Rosato wrote:
> On 5/20/22 9:56 AM, Tony Krowiak wrote:
>>
>>
>> On 5/19/22 2:33 PM, Matthew Rosato wrote:
>>> Rather than relying on a notifier for associating the KVM with
>>> the group, let's assume that the association has already been
>>> made prior to device_open.  The first time a device is opened
>>> associate the group KVM with the device.
>>>
>>> This fixes a user-triggerable oops in GVT.
>>>
>>> Reviewed-by: Tony Krowiak <[email protected]>
>>> Reviewed-by: Kevin Tian <[email protected]>
>>> Reviewed-by: Christoph Hellwig <[email protected]>
>>> Signed-off-by: Jason Gunthorpe <[email protected]>
>>> Signed-off-by: Matthew Rosato <[email protected]>
>>> ---
>>>   drivers/gpu/drm/i915/gvt/gtt.c        |  4 +-
>>>   drivers/gpu/drm/i915/gvt/gvt.h        |  3 -
>>>   drivers/gpu/drm/i915/gvt/kvmgt.c      | 82 ++++++--------------------
>>>   drivers/s390/crypto/vfio_ap_ops.c     | 35 ++---------
>>>   drivers/s390/crypto/vfio_ap_private.h |  3 -
>>>   drivers/vfio/vfio.c                   | 83
>>> ++++++++++-----------------
>>>   include/linux/vfio.h                  |  6 +-
>>>   7 files changed, 57 insertions(+), 159 deletions(-)
>>>
>>>
>>> diff --git a/drivers/s390/crypto/vfio_ap_ops.c
>>> b/drivers/s390/crypto/vfio_ap_ops.c
>>> index e8914024f5b1..a7d2a95796d3 100644
>>> --- a/drivers/s390/crypto/vfio_ap_ops.c
>>> +++ b/drivers/s390/crypto/vfio_ap_ops.c
>>> @@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct
>>> ap_matrix_mdev *matrix_mdev)
>>>       }
>>>   }
>>> -static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>> -                       unsigned long action, void *data)
>>> -{
>>> -    int notify_rc = NOTIFY_OK;
>>> -    struct ap_matrix_mdev *matrix_mdev;
>>> -
>>> -    if (action != VFIO_GROUP_NOTIFY_SET_KVM)
>>> -        return NOTIFY_OK;
>>> -
>>> -    matrix_mdev = container_of(nb, struct ap_matrix_mdev,
>>> group_notifier);
>>> -
>>> -    if (!data)
>>> -        vfio_ap_mdev_unset_kvm(matrix_mdev);
>>> -    else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
>>> -        notify_rc = NOTIFY_DONE;
>>> -
>>> -    return notify_rc;
>>> -}
>>> -
>>>   static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
>>>   {
>>>       struct device *dev;
>>> @@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct
>>> vfio_device *vdev)
>>>       unsigned long events;
>>>       int ret;
>>> -    matrix_mdev->group_notifier.notifier_call =
>>> vfio_ap_mdev_group_notifier;
>>> -    events = VFIO_GROUP_NOTIFY_SET_KVM;
>>> +    if (!vdev->kvm)
>>> +        return -EINVAL;
>>> -    ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
>>> -                     &matrix_mdev->group_notifier);
>>> +    ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
>>>       if (ret)
>>>           return ret;
>>
>> I'm sorry I didn't see this with my last review, but maybe move the call
>> to vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm) after the successful
>> registration of the IOMMU notifier? This way you won't be plugging AP
>> queues
>> into the guest only to remove them if the registration fails.
>
> This is a pretty edge error case, and the
> vfio_ap_mdev_unset_kvm(matrix_mdev) call at err_kvm should do the
> proper cleanup, right?  I guess I'm wondering if it's really any
> different than the prior code which would have registered the
> VFIO_GROUP_NOTIFY_SET_KVM first, which would have immediately
> triggered the notifier since the KVM was already registered to the
> group, meaning it would haved called
> vfio_ap_mdev_group_notifier->vfio_ap_mdev_set_kvm anyway (see
> vfio_register_group_notifier, the "The attaching of kvm and vfio_group
> might already happen..." comment)

You are correct, the VFIO_GROUP_NOTIFY_SET_KVM notifier will get
triggered when it is registered; however, you may have pointed out a
flaw in the previous version of the code. I'm guessing this notifier is
not triggered when it is unregistered, so unless the guest is terminated
due to a non-zero return code from the open_device callback, it will
have access to the AP queues. In hindsight, we probably should have
registered the IOMMU notifier first.

You make a valid point about this being an edge case and I don't think
it's critical, so feel free to keep it as-is.

My r-b still stands.

>
>>
>>> @@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct
>>> vfio_device *vdev)
>>>       ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
>>>                        &matrix_mdev->iommu_notifier);
>>>       if (ret)
>>> -        goto out_unregister_group;
>>> +        goto err_kvm;
>>>       return 0;
>>> -out_unregister_group:
>>> -    vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
>>> -                 &matrix_mdev->group_notifier);
>>> +err_kvm:
>>> +    vfio_ap_mdev_unset_kvm(matrix_mdev);
>>>       return ret;
>>>   }
>


2022-05-23 07:51:40

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Thu, May 19, 2022 at 02:33:11PM -0400, Matthew Rosato wrote:
> Rather than relying on a notifier for associating the KVM with
> the group, let's assume that the association has already been
> made prior to device_open. The first time a device is opened
> associate the group KVM with the device.
>
> This fixes a user-triggerable oops in GVT.
>
> Reviewed-by: Tony Krowiak <[email protected]>
> Reviewed-by: Kevin Tian <[email protected]>
> Reviewed-by: Christoph Hellwig <[email protected]>
> Signed-off-by: Jason Gunthorpe <[email protected]>
> Signed-off-by: Matthew Rosato <[email protected]>
> ---
> drivers/gpu/drm/i915/gvt/gtt.c | 4 +-
> drivers/gpu/drm/i915/gvt/gvt.h | 3 -
> drivers/gpu/drm/i915/gvt/kvmgt.c | 82 ++++++--------------------
> drivers/s390/crypto/vfio_ap_ops.c | 35 ++---------
> drivers/s390/crypto/vfio_ap_private.h | 3 -
> drivers/vfio/vfio.c | 83 ++++++++++-----------------
> include/linux/vfio.h | 6 +-
> 7 files changed, 57 insertions(+), 159 deletions(-)

Reviewed-by: Jason Gunthorpe <[email protected]>

Jason

2022-05-23 16:43:40

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM


Hi Zhi & Zhenyu,

Please review gvt changes below, I'd prefer to get your ack included.
Thanks!

Alex

On Thu, 19 May 2022 14:33:11 -0400
Matthew Rosato <[email protected]> wrote:

> Rather than relying on a notifier for associating the KVM with
> the group, let's assume that the association has already been
> made prior to device_open. The first time a device is opened
> associate the group KVM with the device.
>
> This fixes a user-triggerable oops in GVT.
>
> Reviewed-by: Tony Krowiak <[email protected]>
> Reviewed-by: Kevin Tian <[email protected]>
> Reviewed-by: Christoph Hellwig <[email protected]>
> Signed-off-by: Jason Gunthorpe <[email protected]>
> Signed-off-by: Matthew Rosato <[email protected]>
> ---
> drivers/gpu/drm/i915/gvt/gtt.c | 4 +-
> drivers/gpu/drm/i915/gvt/gvt.h | 3 -
> drivers/gpu/drm/i915/gvt/kvmgt.c | 82 ++++++--------------------
> drivers/s390/crypto/vfio_ap_ops.c | 35 ++---------
> drivers/s390/crypto/vfio_ap_private.h | 3 -
> drivers/vfio/vfio.c | 83 ++++++++++-----------------
> include/linux/vfio.h | 6 +-
> 7 files changed, 57 insertions(+), 159 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
> index 9c5cc2800975..b4f69364f9a1 100644
> --- a/drivers/gpu/drm/i915/gvt/gtt.c
> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
> @@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192;
>
> static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
> {
> - struct kvm *kvm = vgpu->kvm;
> + struct kvm *kvm = vgpu->vfio_device.kvm;
> int idx;
> bool ret;
>
> @@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
>
> if (!vgpu->attached)
> return -EINVAL;
> - pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry));
> + pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
> if (is_error_noslot_pfn(pfn))
> return -EINVAL;
> return PageTransHuge(pfn_to_page(pfn));
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index 2af4c83e733c..aee1a45da74b 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -227,9 +227,6 @@ struct intel_vgpu {
> struct mutex cache_lock;
>
> struct notifier_block iommu_notifier;
> - struct notifier_block group_notifier;
> - struct kvm *kvm;
> - struct work_struct release_work;
> atomic_t released;
>
> struct kvm_page_track_notifier_node track_node;
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 7655ffa97d51..e2f6c56ab342 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
> }
> }
>
> -static void intel_vgpu_release_work(struct work_struct *work);
> -
> static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
> unsigned long size)
> {
> @@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
> return NOTIFY_OK;
> }
>
> -static int intel_vgpu_group_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> -{
> - struct intel_vgpu *vgpu =
> - container_of(nb, struct intel_vgpu, group_notifier);
> -
> - /* the only action we care about */
> - if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
> - vgpu->kvm = data;
> -
> - if (!data)
> - schedule_work(&vgpu->release_work);
> - }
> -
> - return NOTIFY_OK;
> -}
> -
> static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> {
> struct intel_vgpu *itr;
> @@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> if (!itr->attached)
> continue;
>
> - if (vgpu->kvm == itr->kvm) {
> + if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
> ret = true;
> goto out;
> }
> @@ -806,7 +787,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
> int ret;
>
> vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
> - vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;
>
> events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
> @@ -817,38 +797,32 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
> goto out;
> }
>
> - events = VFIO_GROUP_NOTIFY_SET_KVM;
> - ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events,
> - &vgpu->group_notifier);
> - if (ret != 0) {
> - gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
> - ret);
> - goto undo_iommu;
> - }
> -
> ret = -EEXIST;
> if (vgpu->attached)
> - goto undo_register;
> + goto undo_iommu;
>
> ret = -ESRCH;
> - if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
> + if (!vgpu->vfio_device.kvm ||
> + vgpu->vfio_device.kvm->mm != current->mm) {
> gvt_vgpu_err("KVM is required to use Intel vGPU\n");
> - goto undo_register;
> + goto undo_iommu;
> }
>
> + kvm_get_kvm(vgpu->vfio_device.kvm);
> +
> ret = -EEXIST;
> if (__kvmgt_vgpu_exist(vgpu))
> - goto undo_register;
> + goto undo_iommu;
>
> vgpu->attached = true;
> - kvm_get_kvm(vgpu->kvm);
>
> kvmgt_protect_table_init(vgpu);
> gvt_cache_init(vgpu);
>
> vgpu->track_node.track_write = kvmgt_page_track_write;
> vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
> - kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
> + kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
> + &vgpu->track_node);
>
> debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
> &vgpu->nr_cache_entries);
> @@ -858,10 +832,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
> atomic_set(&vgpu->released, 0);
> return 0;
>
> -undo_register:
> - vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY,
> - &vgpu->group_notifier);
> -
> undo_iommu:
> vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
> &vgpu->iommu_notifier);
> @@ -880,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
> }
> }
>
> -static void __intel_vgpu_release(struct intel_vgpu *vgpu)
> +static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
> {
> + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
> int ret;
>
> @@ -898,35 +869,19 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
> drm_WARN(&i915->drm, ret,
> "vfio_unregister_notifier for iommu failed: %d\n", ret);
>
> - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY,
> - &vgpu->group_notifier);
> - drm_WARN(&i915->drm, ret,
> - "vfio_unregister_notifier for group failed: %d\n", ret);
> -
> debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
>
> - kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
> - kvm_put_kvm(vgpu->kvm);
> + kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
> + &vgpu->track_node);
> kvmgt_protect_table_destroy(vgpu);
> gvt_cache_destroy(vgpu);
>
> intel_vgpu_release_msi_eventfd_ctx(vgpu);
>
> - vgpu->kvm = NULL;
> vgpu->attached = false;
> -}
> -
> -static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
> -{
> - __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
> -}
> -
> -static void intel_vgpu_release_work(struct work_struct *work)
> -{
> - struct intel_vgpu *vgpu =
> - container_of(work, struct intel_vgpu, release_work);
>
> - __intel_vgpu_release(vgpu);
> + if (vgpu->vfio_device.kvm)
> + kvm_put_kvm(vgpu->vfio_device.kvm);
> }
>
> static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
> @@ -1675,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
> return PTR_ERR(vgpu);
> }
>
> - INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
> vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
> &intel_vgpu_dev_ops);
>
> @@ -1713,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
>
> int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
> {
> - struct kvm *kvm = info->kvm;
> + struct kvm *kvm = info->vfio_device.kvm;
> struct kvm_memory_slot *slot;
> int idx;
>
> @@ -1743,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
>
> int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
> {
> - struct kvm *kvm = info->kvm;
> + struct kvm *kvm = info->vfio_device.kvm;
> struct kvm_memory_slot *slot;
> int idx;
>
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index e8914024f5b1..a7d2a95796d3 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
> }
> }
>
> -static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> -{
> - int notify_rc = NOTIFY_OK;
> - struct ap_matrix_mdev *matrix_mdev;
> -
> - if (action != VFIO_GROUP_NOTIFY_SET_KVM)
> - return NOTIFY_OK;
> -
> - matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
> -
> - if (!data)
> - vfio_ap_mdev_unset_kvm(matrix_mdev);
> - else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
> - notify_rc = NOTIFY_DONE;
> -
> - return notify_rc;
> -}
> -
> static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
> {
> struct device *dev;
> @@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
> unsigned long events;
> int ret;
>
> - matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
> - events = VFIO_GROUP_NOTIFY_SET_KVM;
> + if (!vdev->kvm)
> + return -EINVAL;
>
> - ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
> - &matrix_mdev->group_notifier);
> + ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
> if (ret)
> return ret;
>
> @@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
> ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
> &matrix_mdev->iommu_notifier);
> if (ret)
> - goto out_unregister_group;
> + goto err_kvm;
> return 0;
>
> -out_unregister_group:
> - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
> - &matrix_mdev->group_notifier);
> +err_kvm:
> + vfio_ap_mdev_unset_kvm(matrix_mdev);
> return ret;
> }
>
> @@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
>
> vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
> &matrix_mdev->iommu_notifier);
> - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
> - &matrix_mdev->group_notifier);
> vfio_ap_mdev_unset_kvm(matrix_mdev);
> }
>
> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> index 648fcaf8104a..a26efd804d0d 100644
> --- a/drivers/s390/crypto/vfio_ap_private.h
> +++ b/drivers/s390/crypto/vfio_ap_private.h
> @@ -81,8 +81,6 @@ struct ap_matrix {
> * @node: allows the ap_matrix_mdev struct to be added to a list
> * @matrix: the adapters, usage domains and control domains assigned to the
> * mediated matrix device.
> - * @group_notifier: notifier block used for specifying callback function for
> - * handling the VFIO_GROUP_NOTIFY_SET_KVM event
> * @iommu_notifier: notifier block used for specifying callback function for
> * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
> * @kvm: the struct holding guest's state
> @@ -94,7 +92,6 @@ struct ap_matrix_mdev {
> struct vfio_device vdev;
> struct list_head node;
> struct ap_matrix matrix;
> - struct notifier_block group_notifier;
> struct notifier_block iommu_notifier;
> struct kvm *kvm;
> crypto_hook pqap_hook;
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index cfcff7764403..831fc722e3f8 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1083,10 +1083,21 @@ static struct file *vfio_device_open(struct vfio_device *device)
>
> mutex_lock(&device->dev_set->lock);
> device->open_count++;
> - if (device->open_count == 1 && device->ops->open_device) {
> - ret = device->ops->open_device(device);
> - if (ret)
> - goto err_undo_count;
> + if (device->open_count == 1) {
> + /*
> + * Here we pass the KVM pointer with the group under the read
> + * lock. If the device driver will use it, it must obtain a
> + * reference and release it during close_device.
> + */
> + down_read(&device->group->group_rwsem);
> + device->kvm = device->group->kvm;
> +
> + if (device->ops->open_device) {
> + ret = device->ops->open_device(device);
> + if (ret)
> + goto err_undo_count;
> + }
> + up_read(&device->group->group_rwsem);
> }
> mutex_unlock(&device->dev_set->lock);
>
> @@ -1119,10 +1130,14 @@ static struct file *vfio_device_open(struct vfio_device *device)
>
> err_close_device:
> mutex_lock(&device->dev_set->lock);
> + down_read(&device->group->group_rwsem);
> if (device->open_count == 1 && device->ops->close_device)
> device->ops->close_device(device);
> err_undo_count:
> device->open_count--;
> + if (device->open_count == 0 && device->kvm)
> + device->kvm = NULL;
> + up_read(&device->group->group_rwsem);
> mutex_unlock(&device->dev_set->lock);
> module_put(device->dev->driver->owner);
> err_unassign_container:
> @@ -1315,9 +1330,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
>
> mutex_lock(&device->dev_set->lock);
> vfio_assert_device_open(device);
> + down_read(&device->group->group_rwsem);
> if (device->open_count == 1 && device->ops->close_device)
> device->ops->close_device(device);
> + up_read(&device->group->group_rwsem);
> device->open_count--;
> + if (device->open_count == 0)
> + device->kvm = NULL;
> mutex_unlock(&device->dev_set->lock);
>
> module_put(device->dev->driver->owner);
> @@ -1726,8 +1745,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
> * @file: VFIO group file
> * @kvm: KVM to link
> *
> - * The kvm pointer will be forwarded to all the vfio_device's attached to the
> - * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
> + * When a VFIO device is first opened the KVM will be available in
> + * device->kvm if one was associated with the group.
> */
> void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
> {
> @@ -1738,8 +1757,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
>
> down_write(&group->group_rwsem);
> group->kvm = kvm;
> - blocking_notifier_call_chain(&group->notifier,
> - VFIO_GROUP_NOTIFY_SET_KVM, kvm);
> up_write(&group->group_rwsem);
> }
> EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
> @@ -2006,7 +2023,8 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
> struct vfio_iommu_driver *driver;
> int ret;
>
> - down_read(&group->group_rwsem);
> + lockdep_assert_held_read(&group->group_rwsem);
> +
> container = group->container;
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->register_notifier))
> @@ -2014,7 +2032,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
> events, nb);
> else
> ret = -ENOTTY;
> - up_read(&group->group_rwsem);
>
> return ret;
> }
> @@ -2026,7 +2043,8 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> struct vfio_iommu_driver *driver;
> int ret;
>
> - down_read(&group->group_rwsem);
> + lockdep_assert_held_read(&group->group_rwsem);
> +
> container = group->container;
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->unregister_notifier))
> @@ -2034,47 +2052,10 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> nb);
> else
> ret = -ENOTTY;
> - up_read(&group->group_rwsem);
>
> return ret;
> }
>
> -static int vfio_register_group_notifier(struct vfio_group *group,
> - unsigned long *events,
> - struct notifier_block *nb)
> -{
> - int ret;
> - bool set_kvm = false;
> -
> - if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
> - set_kvm = true;
> -
> - /* clear known events */
> - *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
> -
> - /* refuse to continue if still events remaining */
> - if (*events)
> - return -EINVAL;
> -
> - ret = blocking_notifier_chain_register(&group->notifier, nb);
> - if (ret)
> - return ret;
> -
> - /*
> - * The attaching of kvm and vfio_group might already happen, so
> - * here we replay once upon registration.
> - */
> - if (set_kvm) {
> - down_read(&group->group_rwsem);
> - if (group->kvm)
> - blocking_notifier_call_chain(&group->notifier,
> - VFIO_GROUP_NOTIFY_SET_KVM,
> - group->kvm);
> - up_read(&group->group_rwsem);
> - }
> - return 0;
> -}
> -
> int vfio_register_notifier(struct vfio_device *device,
> enum vfio_notify_type type, unsigned long *events,
> struct notifier_block *nb)
> @@ -2090,9 +2071,6 @@ int vfio_register_notifier(struct vfio_device *device,
> case VFIO_IOMMU_NOTIFY:
> ret = vfio_register_iommu_notifier(group, events, nb);
> break;
> - case VFIO_GROUP_NOTIFY:
> - ret = vfio_register_group_notifier(group, events, nb);
> - break;
> default:
> ret = -EINVAL;
> }
> @@ -2114,9 +2092,6 @@ int vfio_unregister_notifier(struct vfio_device *device,
> case VFIO_IOMMU_NOTIFY:
> ret = vfio_unregister_iommu_notifier(group, nb);
> break;
> - case VFIO_GROUP_NOTIFY:
> - ret = blocking_notifier_chain_unregister(&group->notifier, nb);
> - break;
> default:
> ret = -EINVAL;
> }
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 45b287826ce6..aa888cc51757 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -36,6 +36,8 @@ struct vfio_device {
> struct vfio_device_set *dev_set;
> struct list_head dev_set_list;
> unsigned int migration_flags;
> + /* Driver must reference the kvm during open_device or never touch it */
> + struct kvm *kvm;
>
> /* Members below here are private, not for driver use */
> refcount_t refcount;
> @@ -155,15 +157,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
> /* each type has independent events */
> enum vfio_notify_type {
> VFIO_IOMMU_NOTIFY = 0,
> - VFIO_GROUP_NOTIFY = 1,
> };
>
> /* events for VFIO_IOMMU_NOTIFY */
> #define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
>
> -/* events for VFIO_GROUP_NOTIFY */
> -#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0)
> -
> extern int vfio_register_notifier(struct vfio_device *device,
> enum vfio_notify_type type,
> unsigned long *required_events,


2022-05-25 09:44:45

by Wang, Zhi A

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On 5/23/22 4:41 PM, Alex Williamson wrote:
>
> Hi Zhi & Zhenyu,
>
> Please review gvt changes below, I'd prefer to get your ack included.
> Thanks!
>
> Alex
>
> On Thu, 19 May 2022 14:33:11 -0400
> Matthew Rosato <[email protected]> wrote:
>
>> Rather than relying on a notifier for associating the KVM with
>> the group, let's assume that the association has already been
>> made prior to device_open. The first time a device is opened
>> associate the group KVM with the device.
>>
>> This fixes a user-triggerable oops in GVT.
>>
>> Reviewed-by: Tony Krowiak <[email protected]>
>> Reviewed-by: Kevin Tian <[email protected]>
>> Reviewed-by: Christoph Hellwig <[email protected]>
>> Signed-off-by: Jason Gunthorpe <[email protected]>
>> Signed-off-by: Matthew Rosato <[email protected]>
>> ---
>> drivers/gpu/drm/i915/gvt/gtt.c | 4 +-
>> drivers/gpu/drm/i915/gvt/gvt.h | 3 -
>> drivers/gpu/drm/i915/gvt/kvmgt.c | 82 ++++++--------------------
>> drivers/s390/crypto/vfio_ap_ops.c | 35 ++---------
>> drivers/s390/crypto/vfio_ap_private.h | 3 -
>> drivers/vfio/vfio.c | 83 ++++++++++-----------------
>> include/linux/vfio.h | 6 +-
>> 7 files changed, 57 insertions(+), 159 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
>> index 9c5cc2800975..b4f69364f9a1 100644
>> --- a/drivers/gpu/drm/i915/gvt/gtt.c
>> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
>> @@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192;
>>
>> static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
>> {
>> - struct kvm *kvm = vgpu->kvm;
>> + struct kvm *kvm = vgpu->vfio_device.kvm;
>> int idx;
>> bool ret;
>>
>> @@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
>>
>> if (!vgpu->attached)
>> return -EINVAL;
>> - pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry));
>> + pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
>> if (is_error_noslot_pfn(pfn))
>> return -EINVAL;
>> return PageTransHuge(pfn_to_page(pfn));
>> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
>> index 2af4c83e733c..aee1a45da74b 100644
>> --- a/drivers/gpu/drm/i915/gvt/gvt.h
>> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
>> @@ -227,9 +227,6 @@ struct intel_vgpu {
>> struct mutex cache_lock;
>>
>> struct notifier_block iommu_notifier;
>> - struct notifier_block group_notifier;
>> - struct kvm *kvm;
>> - struct work_struct release_work;
>> atomic_t released;
>>
>> struct kvm_page_track_notifier_node track_node;
>> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
>> index 7655ffa97d51..e2f6c56ab342 100644
>> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
>> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
>> @@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
>> }
>> }
>>
>> -static void intel_vgpu_release_work(struct work_struct *work);
>> -
>> static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
>> unsigned long size)
>> {
>> @@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
>> return NOTIFY_OK;
>> }
>>
>> -static int intel_vgpu_group_notifier(struct notifier_block *nb,
>> - unsigned long action, void *data)
>> -{
>> - struct intel_vgpu *vgpu =
>> - container_of(nb, struct intel_vgpu, group_notifier);
>> -
>> - /* the only action we care about */
>> - if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
>> - vgpu->kvm = data;
>> -
>> - if (!data)
>> - schedule_work(&vgpu->release_work);
>> - }
>> -
>> - return NOTIFY_OK;
>> -}
>> -
>> static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
>> {
>> struct intel_vgpu *itr;
>> @@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
>> if (!itr->attached)
>> continue;
>>
>> - if (vgpu->kvm == itr->kvm) {
>> + if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
>> ret = true;
>> goto out;
>> }
>> @@ -806,7 +787,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
>> int ret;
>>
>> vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
>> - vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;
>>
>> events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
>> ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
>> @@ -817,38 +797,32 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
>> goto out;
>> }
>>
>> - events = VFIO_GROUP_NOTIFY_SET_KVM;
>> - ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events,
>> - &vgpu->group_notifier);
>> - if (ret != 0) {
>> - gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
>> - ret);
>> - goto undo_iommu;
>> - }
>> -
>> ret = -EEXIST;
>> if (vgpu->attached)
>> - goto undo_register;
>> + goto undo_iommu;
>>
>> ret = -ESRCH;
>> - if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
>> + if (!vgpu->vfio_device.kvm ||
>> + vgpu->vfio_device.kvm->mm != current->mm) {
>> gvt_vgpu_err("KVM is required to use Intel vGPU\n");
>> - goto undo_register;
>> + goto undo_iommu;
>> }
>>
>> + kvm_get_kvm(vgpu->vfio_device.kvm);
>> +
>> ret = -EEXIST;
>> if (__kvmgt_vgpu_exist(vgpu))
>> - goto undo_register;
>> + goto undo_iommu;
>>
>> vgpu->attached = true;
>> - kvm_get_kvm(vgpu->kvm);
>>
>> kvmgt_protect_table_init(vgpu);
>> gvt_cache_init(vgpu);
>>
>> vgpu->track_node.track_write = kvmgt_page_track_write;
>> vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
>> - kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
>> + kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
>> + &vgpu->track_node);
>>
>> debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
>> &vgpu->nr_cache_entries);
>> @@ -858,10 +832,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
>> atomic_set(&vgpu->released, 0);
>> return 0;
>>
>> -undo_register:
>> - vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY,
>> - &vgpu->group_notifier);
>> -
>> undo_iommu:
>> vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
>> &vgpu->iommu_notifier);
>> @@ -880,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
>> }
>> }
>>
>> -static void __intel_vgpu_release(struct intel_vgpu *vgpu)
>> +static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
>> {
>> + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
>> struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
>> int ret;
>>
>> @@ -898,35 +869,19 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
>> drm_WARN(&i915->drm, ret,
>> "vfio_unregister_notifier for iommu failed: %d\n", ret);
>>
>> - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY,
>> - &vgpu->group_notifier);
>> - drm_WARN(&i915->drm, ret,
>> - "vfio_unregister_notifier for group failed: %d\n", ret);
>> -
>> debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
>>
>> - kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
>> - kvm_put_kvm(vgpu->kvm);
>> + kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
>> + &vgpu->track_node);
>> kvmgt_protect_table_destroy(vgpu);
>> gvt_cache_destroy(vgpu);
>>
>> intel_vgpu_release_msi_eventfd_ctx(vgpu);
>>
>> - vgpu->kvm = NULL;
>> vgpu->attached = false;
>> -}
>> -
>> -static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
>> -{
>> - __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
>> -}
>> -
>> -static void intel_vgpu_release_work(struct work_struct *work)
>> -{
>> - struct intel_vgpu *vgpu =
>> - container_of(work, struct intel_vgpu, release_work);
>>
>> - __intel_vgpu_release(vgpu);
>> + if (vgpu->vfio_device.kvm)
>> + kvm_put_kvm(vgpu->vfio_device.kvm);
>> }
>>
>> static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
>> @@ -1675,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
>> return PTR_ERR(vgpu);
>> }
>>
>> - INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
>> vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
>> &intel_vgpu_dev_ops);
>>
>> @@ -1713,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
>>
>> int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
>> {
>> - struct kvm *kvm = info->kvm;
>> + struct kvm *kvm = info->vfio_device.kvm;
>> struct kvm_memory_slot *slot;
>> int idx;
>>
>> @@ -1743,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
>>
>> int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
>> {
>> - struct kvm *kvm = info->kvm;
>> + struct kvm *kvm = info->vfio_device.kvm;
>> struct kvm_memory_slot *slot;
>> int idx;
>>
>> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
>> index e8914024f5b1..a7d2a95796d3 100644
>> --- a/drivers/s390/crypto/vfio_ap_ops.c
>> +++ b/drivers/s390/crypto/vfio_ap_ops.c
>> @@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
>> }
>> }
>>
>> -static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>> - unsigned long action, void *data)
>> -{
>> - int notify_rc = NOTIFY_OK;
>> - struct ap_matrix_mdev *matrix_mdev;
>> -
>> - if (action != VFIO_GROUP_NOTIFY_SET_KVM)
>> - return NOTIFY_OK;
>> -
>> - matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>> -
>> - if (!data)
>> - vfio_ap_mdev_unset_kvm(matrix_mdev);
>> - else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
>> - notify_rc = NOTIFY_DONE;
>> -
>> - return notify_rc;
>> -}
>> -
>> static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
>> {
>> struct device *dev;
>> @@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
>> unsigned long events;
>> int ret;
>>
>> - matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
>> - events = VFIO_GROUP_NOTIFY_SET_KVM;
>> + if (!vdev->kvm)
>> + return -EINVAL;
>>
>> - ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
>> - &matrix_mdev->group_notifier);
>> + ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
>> if (ret)
>> return ret;
>>
>> @@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
>> ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
>> &matrix_mdev->iommu_notifier);
>> if (ret)
>> - goto out_unregister_group;
>> + goto err_kvm;
>> return 0;
>>
>> -out_unregister_group:
>> - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
>> - &matrix_mdev->group_notifier);
>> +err_kvm:
>> + vfio_ap_mdev_unset_kvm(matrix_mdev);
>> return ret;
>> }
>>
>> @@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
>>
>> vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
>> &matrix_mdev->iommu_notifier);
>> - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
>> - &matrix_mdev->group_notifier);
>> vfio_ap_mdev_unset_kvm(matrix_mdev);
>> }
>>
>> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
>> index 648fcaf8104a..a26efd804d0d 100644
>> --- a/drivers/s390/crypto/vfio_ap_private.h
>> +++ b/drivers/s390/crypto/vfio_ap_private.h
>> @@ -81,8 +81,6 @@ struct ap_matrix {
>> * @node: allows the ap_matrix_mdev struct to be added to a list
>> * @matrix: the adapters, usage domains and control domains assigned to the
>> * mediated matrix device.
>> - * @group_notifier: notifier block used for specifying callback function for
>> - * handling the VFIO_GROUP_NOTIFY_SET_KVM event
>> * @iommu_notifier: notifier block used for specifying callback function for
>> * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
>> * @kvm: the struct holding guest's state
>> @@ -94,7 +92,6 @@ struct ap_matrix_mdev {
>> struct vfio_device vdev;
>> struct list_head node;
>> struct ap_matrix matrix;
>> - struct notifier_block group_notifier;
>> struct notifier_block iommu_notifier;
>> struct kvm *kvm;
>> crypto_hook pqap_hook;
>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
>> index cfcff7764403..831fc722e3f8 100644
>> --- a/drivers/vfio/vfio.c
>> +++ b/drivers/vfio/vfio.c
>> @@ -1083,10 +1083,21 @@ static struct file *vfio_device_open(struct vfio_device *device)
>>
>> mutex_lock(&device->dev_set->lock);
>> device->open_count++;
>> - if (device->open_count == 1 && device->ops->open_device) {
>> - ret = device->ops->open_device(device);
>> - if (ret)
>> - goto err_undo_count;
>> + if (device->open_count == 1) {
>> + /*
>> + * Here we pass the KVM pointer with the group under the read
>> + * lock. If the device driver will use it, it must obtain a
>> + * reference and release it during close_device.
>> + */
>> + down_read(&device->group->group_rwsem);
>> + device->kvm = device->group->kvm;
>> +
>> + if (device->ops->open_device) {
>> + ret = device->ops->open_device(device);
>> + if (ret)
>> + goto err_undo_count;
>> + }
>> + up_read(&device->group->group_rwsem);
>> }
>> mutex_unlock(&device->dev_set->lock);
>>
>> @@ -1119,10 +1130,14 @@ static struct file *vfio_device_open(struct vfio_device *device)
>>
>> err_close_device:
>> mutex_lock(&device->dev_set->lock);
>> + down_read(&device->group->group_rwsem);
>> if (device->open_count == 1 && device->ops->close_device)
>> device->ops->close_device(device);
>> err_undo_count:
>> device->open_count--;
>> + if (device->open_count == 0 && device->kvm)
>> + device->kvm = NULL;
>> + up_read(&device->group->group_rwsem);
>> mutex_unlock(&device->dev_set->lock);
>> module_put(device->dev->driver->owner);
>> err_unassign_container:
>> @@ -1315,9 +1330,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
>>
>> mutex_lock(&device->dev_set->lock);
>> vfio_assert_device_open(device);
>> + down_read(&device->group->group_rwsem);
>> if (device->open_count == 1 && device->ops->close_device)
>> device->ops->close_device(device);
>> + up_read(&device->group->group_rwsem);
>> device->open_count--;
>> + if (device->open_count == 0)
>> + device->kvm = NULL;
>> mutex_unlock(&device->dev_set->lock);
>>
>> module_put(device->dev->driver->owner);
>> @@ -1726,8 +1745,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
>> * @file: VFIO group file
>> * @kvm: KVM to link
>> *
>> - * The kvm pointer will be forwarded to all the vfio_device's attached to the
>> - * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
>> + * When a VFIO device is first opened the KVM will be available in
>> + * device->kvm if one was associated with the group.
>> */
>> void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
>> {
>> @@ -1738,8 +1757,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
>>
>> down_write(&group->group_rwsem);
>> group->kvm = kvm;
>> - blocking_notifier_call_chain(&group->notifier,
>> - VFIO_GROUP_NOTIFY_SET_KVM, kvm);
>> up_write(&group->group_rwsem);
>> }
>> EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
>> @@ -2006,7 +2023,8 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
>> struct vfio_iommu_driver *driver;
>> int ret;
>>
>> - down_read(&group->group_rwsem);
>> + lockdep_assert_held_read(&group->group_rwsem);
>> +
>> container = group->container;
>> driver = container->iommu_driver;
>> if (likely(driver && driver->ops->register_notifier))
>> @@ -2014,7 +2032,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
>> events, nb);
>> else
>> ret = -ENOTTY;
>> - up_read(&group->group_rwsem);
>>
>> return ret;
>> }
>> @@ -2026,7 +2043,8 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
>> struct vfio_iommu_driver *driver;
>> int ret;
>>
>> - down_read(&group->group_rwsem);
>> + lockdep_assert_held_read(&group->group_rwsem);
>> +
>> container = group->container;
>> driver = container->iommu_driver;
>> if (likely(driver && driver->ops->unregister_notifier))
>> @@ -2034,47 +2052,10 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
>> nb);
>> else
>> ret = -ENOTTY;
>> - up_read(&group->group_rwsem);
>>
>> return ret;
>> }
>>
>> -static int vfio_register_group_notifier(struct vfio_group *group,
>> - unsigned long *events,
>> - struct notifier_block *nb)
>> -{
>> - int ret;
>> - bool set_kvm = false;
>> -
>> - if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
>> - set_kvm = true;
>> -
>> - /* clear known events */
>> - *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
>> -
>> - /* refuse to continue if still events remaining */
>> - if (*events)
>> - return -EINVAL;
>> -
>> - ret = blocking_notifier_chain_register(&group->notifier, nb);
>> - if (ret)
>> - return ret;
>> -
>> - /*
>> - * The attaching of kvm and vfio_group might already happen, so
>> - * here we replay once upon registration.
>> - */
>> - if (set_kvm) {
>> - down_read(&group->group_rwsem);
>> - if (group->kvm)
>> - blocking_notifier_call_chain(&group->notifier,
>> - VFIO_GROUP_NOTIFY_SET_KVM,
>> - group->kvm);
>> - up_read(&group->group_rwsem);
>> - }
>> - return 0;
>> -}
>> -
>> int vfio_register_notifier(struct vfio_device *device,
>> enum vfio_notify_type type, unsigned long *events,
>> struct notifier_block *nb)
>> @@ -2090,9 +2071,6 @@ int vfio_register_notifier(struct vfio_device *device,
>> case VFIO_IOMMU_NOTIFY:
>> ret = vfio_register_iommu_notifier(group, events, nb);
>> break;
>> - case VFIO_GROUP_NOTIFY:
>> - ret = vfio_register_group_notifier(group, events, nb);
>> - break;
>> default:
>> ret = -EINVAL;
>> }
>> @@ -2114,9 +2092,6 @@ int vfio_unregister_notifier(struct vfio_device *device,
>> case VFIO_IOMMU_NOTIFY:
>> ret = vfio_unregister_iommu_notifier(group, nb);
>> break;
>> - case VFIO_GROUP_NOTIFY:
>> - ret = blocking_notifier_chain_unregister(&group->notifier, nb);
>> - break;
>> default:
>> ret = -EINVAL;
>> }
>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>> index 45b287826ce6..aa888cc51757 100644
>> --- a/include/linux/vfio.h
>> +++ b/include/linux/vfio.h
>> @@ -36,6 +36,8 @@ struct vfio_device {
>> struct vfio_device_set *dev_set;
>> struct list_head dev_set_list;
>> unsigned int migration_flags;
>> + /* Driver must reference the kvm during open_device or never touch it */
>> + struct kvm *kvm;
>>
>> /* Members below here are private, not for driver use */
>> refcount_t refcount;
>> @@ -155,15 +157,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
>> /* each type has independent events */
>> enum vfio_notify_type {
>> VFIO_IOMMU_NOTIFY = 0,
>> - VFIO_GROUP_NOTIFY = 1,
>> };
>>
>> /* events for VFIO_IOMMU_NOTIFY */
>> #define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
>>
>> -/* events for VFIO_GROUP_NOTIFY */
>> -#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0)
>> -
>> extern int vfio_register_notifier(struct vfio_device *device,
>> enum vfio_notify_type type,
>> unsigned long *required_events,
>
Acked-by: Zhi Wang <[email protected]>

2023-01-05 22:21:04

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Thu, 19 May 2022 14:33:11 -0400
Matthew Rosato <[email protected]> wrote:

> Rather than relying on a notifier for associating the KVM with
> the group, let's assume that the association has already been
> made prior to device_open. The first time a device is opened
> associate the group KVM with the device.
>
> This fixes a user-triggerable oops in GVT.

It seems this has traded an oops for a deadlock, which still exists
today in both GVT-g and vfio-ap. These are the only vfio drivers that
care about kvm, so they make use of kvm_{get,put}_kvm(), where the
latter is called by their .close_device() callbacks.

.close_device() is called holding the group->group_lock, or at the time
of this commit group->group_rwsem. The remaining call chain looks like
this:

kvm_put_kvm
-> kvm_destroy_vm
-> kvm_destroy_devices
-> kvm_vfio_destroy
-> kvm_vfio_file_set_kvm
-> vfio_file_set_kvm
-> group->group_lock/group_rwsem

Any suggestions for a fix? Thanks,

Alex

2023-01-05 23:37:56

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Thu, Jan 05, 2023 at 03:09:30PM -0700, Alex Williamson wrote:
> On Thu, 19 May 2022 14:33:11 -0400
> Matthew Rosato <[email protected]> wrote:
>
> > Rather than relying on a notifier for associating the KVM with
> > the group, let's assume that the association has already been
> > made prior to device_open. The first time a device is opened
> > associate the group KVM with the device.
> >
> > This fixes a user-triggerable oops in GVT.
>
> It seems this has traded an oops for a deadlock, which still exists
> today in both GVT-g and vfio-ap. These are the only vfio drivers that
> care about kvm, so they make use of kvm_{get,put}_kvm(), where the
> latter is called by their .close_device() callbacks.

Bleck

It is pretty common to run the final part of 'put' from a workqueue
specifically to avoid stuff like this, eg fput does it

Maybe that is the simplest?

Jason

2023-01-06 00:37:58

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Thu, Jan 05, 2023 at 07:16:37PM -0500, Matthew Rosato wrote:

> Yeah, this is also what I was thinking, replace the direct
> kvm_put_kvm calls with, say, schedule_delayed_work in each driver,
> where the delayed task just does the kvm_put_kvm (along with a brief
> comment explaining why we handle the put asynchronously).

Don't put that in every driver, do something like mmput_async() where
the core code has all of this.

> Other than that.. The goal of this patch originally was to get the
> kvm reference at first open_device and release it with the very last
> close_device, so the only other option I could think of would be to
> take the responsibility back from the vfio drivers and do the
> kvm_get_kvm and kvm_put_kvm directly in vfio_main after dropping the
> (but that would result in some ugly symbol linkage and would acquire
> kvm references that a driver maybe does not care about so I don't
> really like that idea)

And we still have the deadlock problem anyhow..

Jason

2023-01-06 00:38:14

by Matthew Rosato

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On 1/5/23 6:34 PM, Jason Gunthorpe wrote:
> On Thu, Jan 05, 2023 at 03:09:30PM -0700, Alex Williamson wrote:
>> On Thu, 19 May 2022 14:33:11 -0400
>> Matthew Rosato <[email protected]> wrote:
>>
>>> Rather than relying on a notifier for associating the KVM with
>>> the group, let's assume that the association has already been
>>> made prior to device_open. The first time a device is opened
>>> associate the group KVM with the device.
>>>
>>> This fixes a user-triggerable oops in GVT.
>>
>> It seems this has traded an oops for a deadlock, which still exists
>> today in both GVT-g and vfio-ap. These are the only vfio drivers that
>> care about kvm, so they make use of kvm_{get,put}_kvm(), where the

vfio-pci-zdev also

>> latter is called by their .close_device() callbacks.

Huh, I've never seen this deadlock with vfio-pci-zdev or vfio-ap, but I see what you're saying... I guess it's not seen under typical circumstances with QEMU because kvm_vfio_group_del would have already been triggered via KVM_DEV_VFIO_GROUP_DEL by the time we close the device, such that the group would not be found during the kvm_vfio_destroy call? (I'm not at all suggesting that we should rely on userspace behaving in this order, just wondering why I never saw it while testing)

>
> Bleck
>
> It is pretty common to run the final part of 'put' from a workqueue
> specifically to avoid stuff like this, eg fput does it
>
> Maybe that is the simplest?

Yeah, this is also what I was thinking, replace the direct kvm_put_kvm calls with, say, schedule_delayed_work in each driver, where the delayed task just does the kvm_put_kvm (along with a brief comment explaining why we handle the put asynchronously).

Other than that.. The goal of this patch originally was to get the kvm reference at first open_device and release it with the very last close_device, so the only other option I could think of would be to take the responsibility back from the vfio drivers and do the kvm_get_kvm and kvm_put_kvm directly in vfio_main after dropping the (but that would result in some ugly symbol linkage and would acquire kvm references that a driver maybe does not care about so I don't really like that idea)

2023-01-06 01:14:59

by Matthew Rosato

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On 1/5/23 7:32 PM, Jason Gunthorpe wrote:
> On Thu, Jan 05, 2023 at 07:16:37PM -0500, Matthew Rosato wrote:
>
>> Yeah, this is also what I was thinking, replace the direct
>> kvm_put_kvm calls with, say, schedule_delayed_work in each driver,
>> where the delayed task just does the kvm_put_kvm (along with a brief
>> comment explaining why we handle the put asynchronously).
>
> Don't put that in every driver, do something like mmput_async() where
> the core code has all of this.
>

If the core vfio code were to add logic to invoke kvm_put_kvm and kvm_get_kvm, won't this introduce a vfio dependency on kvm? If I recall, we have the drivers handling the kvm reference today in order to avoid that..

>> Other than that.. The goal of this patch originally was to get the
>> kvm reference at first open_device and release it with the very last
>> close_device, so the only other option I could think of would be to
>> take the responsibility back from the vfio drivers and do the
>> kvm_get_kvm and kvm_put_kvm directly in vfio_main after dropping the
>> (but that would result in some ugly symbol linkage and would acquire
>> kvm references that a driver maybe does not care about so I don't
>> really like that idea)
>
> And we still have the deadlock problem anyhow..

Looks like I never finished my sentence here -- I meant call kvm_put_kvm directly in vfio_main after dropping the group lock (e.g. when we set device->kvm = NULL;). But I think we'd still have the kvm dependency issue


2023-01-06 15:05:50

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

On Thu, Jan 05, 2023 at 08:03:32PM -0500, Matthew Rosato wrote:
> On 1/5/23 7:32 PM, Jason Gunthorpe wrote:
> > On Thu, Jan 05, 2023 at 07:16:37PM -0500, Matthew Rosato wrote:
> >
> >> Yeah, this is also what I was thinking, replace the direct
> >> kvm_put_kvm calls with, say, schedule_delayed_work in each driver,
> >> where the delayed task just does the kvm_put_kvm (along with a brief
> >> comment explaining why we handle the put asynchronously).
> >
> > Don't put that in every driver, do something like mmput_async() where
> > the core code has all of this.
> >
>
> If the core vfio code were to add logic to invoke kvm_put_kvm and
> kvm_get_kvm, won't this introduce a vfio dependency on kvm? If I
> recall, we have the drivers handling the kvm reference today in
> order to avoid that..

Not in vfio, put it in kvm 'kvm_put_async()'

Jason