The original intent of vfio_container.group_lock is to protect
vfio_container.group_list, however over time it's become a crutch to
prevent changes in container composition any time we call into the
iommu driver backend. This introduces problems when we start to have
more complex interactions, for example when a user's DMA unmap request
triggers a notification to an mdev vendor driver, who responds by
attempting to unpin mappings within that request, re-entering the
iommu backend. We incorrectly assume that the use of read-locks here
allow for this nested locking behavior, but a poorly timed write-lock
could in fact trigger a deadlock.
The current use of group_lock seems to fall into the trap of locking
code, not data. Correct that by removing uses of group_lock that are
not directly related to group_list. Note that the vfio type1 iommu
backend has its own mutex, vfio_iommu.lock, which it uses to protect
itself for each of these interfaces anyway. The group_lock appears to
be a redundancy for these interfaces and type1 even goes so far as to
release its mutex to allow for exactly the re-entrant code path above.
Reported-by: Chuanxiao Dong <[email protected]>
Signed-off-by: Alex Williamson <[email protected]>
---
Alexey, does the SPAPR/TCE iommu backend have any dependencies on this
lock? If so, let's create a lock in the spapr_tce backend like we
have in type1 to handle it. I believe the ioctl passthrough is the
only interface that can reach spapr_tce. Thanks,
Alex
drivers/vfio/vfio.c | 38 --------------------------------------
1 file changed, 38 deletions(-)
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 7597a377eb4e..330d50582f40 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1175,15 +1175,11 @@ static long vfio_fops_unl_ioctl(struct file *filep,
ret = vfio_ioctl_set_iommu(container, arg);
break;
default:
- down_read(&container->group_lock);
-
driver = container->iommu_driver;
data = container->iommu_data;
if (driver) /* passthrough all unrecognized ioctls */
ret = driver->ops->ioctl(data, cmd, arg);
-
- up_read(&container->group_lock);
}
return ret;
@@ -1237,15 +1233,11 @@ static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
struct vfio_iommu_driver *driver;
ssize_t ret = -EINVAL;
- down_read(&container->group_lock);
-
driver = container->iommu_driver;
if (likely(driver && driver->ops->read))
ret = driver->ops->read(container->iommu_data,
buf, count, ppos);
- up_read(&container->group_lock);
-
return ret;
}
@@ -1256,15 +1248,11 @@ static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
struct vfio_iommu_driver *driver;
ssize_t ret = -EINVAL;
- down_read(&container->group_lock);
-
driver = container->iommu_driver;
if (likely(driver && driver->ops->write))
ret = driver->ops->write(container->iommu_data,
buf, count, ppos);
- up_read(&container->group_lock);
-
return ret;
}
@@ -1274,14 +1262,10 @@ static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
struct vfio_iommu_driver *driver;
int ret = -EINVAL;
- down_read(&container->group_lock);
-
driver = container->iommu_driver;
if (likely(driver && driver->ops->mmap))
ret = driver->ops->mmap(container->iommu_data, vma);
- up_read(&container->group_lock);
-
return ret;
}
@@ -1993,8 +1977,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
goto err_pin_pages;
container = group->container;
- down_read(&container->group_lock);
-
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
@@ -2002,7 +1984,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
else
ret = -ENOTTY;
- up_read(&container->group_lock);
vfio_group_try_dissolve_container(group);
err_pin_pages:
@@ -2042,8 +2023,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
goto err_unpin_pages;
container = group->container;
- down_read(&container->group_lock);
-
driver = container->iommu_driver;
if (likely(driver && driver->ops->unpin_pages))
ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
@@ -2051,7 +2030,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
else
ret = -ENOTTY;
- up_read(&container->group_lock);
vfio_group_try_dissolve_container(group);
err_unpin_pages:
@@ -2073,8 +2051,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
return -EINVAL;
container = group->container;
- down_read(&container->group_lock);
-
driver = container->iommu_driver;
if (likely(driver && driver->ops->register_notifier))
ret = driver->ops->register_notifier(container->iommu_data,
@@ -2082,7 +2058,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
else
ret = -ENOTTY;
- up_read(&container->group_lock);
vfio_group_try_dissolve_container(group);
return ret;
@@ -2100,8 +2075,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
return -EINVAL;
container = group->container;
- down_read(&container->group_lock);
-
driver = container->iommu_driver;
if (likely(driver && driver->ops->unregister_notifier))
ret = driver->ops->unregister_notifier(container->iommu_data,
@@ -2109,7 +2082,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
else
ret = -ENOTTY;
- up_read(&container->group_lock);
vfio_group_try_dissolve_container(group);
return ret;
@@ -2127,7 +2099,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
unsigned long *events,
struct notifier_block *nb)
{
- struct vfio_container *container;
int ret;
bool set_kvm = false;
@@ -2145,9 +2116,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
if (ret)
return -EINVAL;
- container = group->container;
- down_read(&container->group_lock);
-
ret = blocking_notifier_chain_register(&group->notifier, nb);
/*
@@ -2158,7 +2126,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
blocking_notifier_call_chain(&group->notifier,
VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
- up_read(&container->group_lock);
vfio_group_try_dissolve_container(group);
return ret;
@@ -2167,19 +2134,14 @@ static int vfio_register_group_notifier(struct vfio_group *group,
static int vfio_unregister_group_notifier(struct vfio_group *group,
struct notifier_block *nb)
{
- struct vfio_container *container;
int ret;
ret = vfio_group_add_container_user(group);
if (ret)
return -EINVAL;
- container = group->container;
- down_read(&container->group_lock);
-
ret = blocking_notifier_chain_unregister(&group->notifier, nb);
- up_read(&container->group_lock);
vfio_group_try_dissolve_container(group);
return ret;
On 08/07/17 08:15, Alex Williamson wrote:
> The original intent of vfio_container.group_lock is to protect
> vfio_container.group_list, however over time it's become a crutch to
> prevent changes in container composition any time we call into the
> iommu driver backend. This introduces problems when we start to have
> more complex interactions, for example when a user's DMA unmap request
> triggers a notification to an mdev vendor driver, who responds by
> attempting to unpin mappings within that request, re-entering the
> iommu backend. We incorrectly assume that the use of read-locks here
> allow for this nested locking behavior, but a poorly timed write-lock
> could in fact trigger a deadlock.
>
> The current use of group_lock seems to fall into the trap of locking
> code, not data. Correct that by removing uses of group_lock that are
> not directly related to group_list. Note that the vfio type1 iommu
> backend has its own mutex, vfio_iommu.lock, which it uses to protect
> itself for each of these interfaces anyway. The group_lock appears to
> be a redundancy for these interfaces and type1 even goes so far as to
> release its mutex to allow for exactly the re-entrant code path above.
>
> Reported-by: Chuanxiao Dong <[email protected]>
> Signed-off-by: Alex Williamson <[email protected]>
> ---
>
> Alexey, does the SPAPR/TCE iommu backend have any dependencies on this
> lock? If so, let's create a lock in the spapr_tce backend like we
> have in type1 to handle it.
There is one already - tce_container::lock.
> I believe the ioctl passthrough is the
> only interface that can reach spapr_tce.
There are also vfio_iommu_driver_ops::attach_group/detach_group but these
are also using tce_container::lock so spapr is going to be fine.
Acked-by: Alexey Kardashevskiy <[email protected]>
> Thanks,
>
> Alex
>
> drivers/vfio/vfio.c | 38 --------------------------------------
> 1 file changed, 38 deletions(-)
>
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 7597a377eb4e..330d50582f40 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1175,15 +1175,11 @@ static long vfio_fops_unl_ioctl(struct file *filep,
> ret = vfio_ioctl_set_iommu(container, arg);
> break;
> default:
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> data = container->iommu_data;
>
> if (driver) /* passthrough all unrecognized ioctls */
> ret = driver->ops->ioctl(data, cmd, arg);
> -
> - up_read(&container->group_lock);
> }
>
> return ret;
> @@ -1237,15 +1233,11 @@ static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
> struct vfio_iommu_driver *driver;
> ssize_t ret = -EINVAL;
>
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->read))
> ret = driver->ops->read(container->iommu_data,
> buf, count, ppos);
>
> - up_read(&container->group_lock);
> -
> return ret;
> }
>
> @@ -1256,15 +1248,11 @@ static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
> struct vfio_iommu_driver *driver;
> ssize_t ret = -EINVAL;
>
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->write))
> ret = driver->ops->write(container->iommu_data,
> buf, count, ppos);
>
> - up_read(&container->group_lock);
> -
> return ret;
> }
>
> @@ -1274,14 +1262,10 @@ static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
> struct vfio_iommu_driver *driver;
> int ret = -EINVAL;
>
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->mmap))
> ret = driver->ops->mmap(container->iommu_data, vma);
>
> - up_read(&container->group_lock);
> -
> return ret;
> }
>
> @@ -1993,8 +1977,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
> goto err_pin_pages;
>
> container = group->container;
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->pin_pages))
> ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
> @@ -2002,7 +1984,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
> else
> ret = -ENOTTY;
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> err_pin_pages:
> @@ -2042,8 +2023,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
> goto err_unpin_pages;
>
> container = group->container;
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->unpin_pages))
> ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
> @@ -2051,7 +2030,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
> else
> ret = -ENOTTY;
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> err_unpin_pages:
> @@ -2073,8 +2051,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
> return -EINVAL;
>
> container = group->container;
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->register_notifier))
> ret = driver->ops->register_notifier(container->iommu_data,
> @@ -2082,7 +2058,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
> else
> ret = -ENOTTY;
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> return ret;
> @@ -2100,8 +2075,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> return -EINVAL;
>
> container = group->container;
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->unregister_notifier))
> ret = driver->ops->unregister_notifier(container->iommu_data,
> @@ -2109,7 +2082,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> else
> ret = -ENOTTY;
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> return ret;
> @@ -2127,7 +2099,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
> unsigned long *events,
> struct notifier_block *nb)
> {
> - struct vfio_container *container;
> int ret;
> bool set_kvm = false;
>
> @@ -2145,9 +2116,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
> if (ret)
> return -EINVAL;
>
> - container = group->container;
> - down_read(&container->group_lock);
> -
> ret = blocking_notifier_chain_register(&group->notifier, nb);
>
> /*
> @@ -2158,7 +2126,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
> blocking_notifier_call_chain(&group->notifier,
> VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> return ret;
> @@ -2167,19 +2134,14 @@ static int vfio_register_group_notifier(struct vfio_group *group,
> static int vfio_unregister_group_notifier(struct vfio_group *group,
> struct notifier_block *nb)
> {
> - struct vfio_container *container;
> int ret;
>
> ret = vfio_group_add_container_user(group);
> if (ret)
> return -EINVAL;
>
> - container = group->container;
> - down_read(&container->group_lock);
> -
> ret = blocking_notifier_chain_unregister(&group->notifier, nb);
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> return ret;
>
--
Alexey
Sounds reasonable to me.
Thanks,
Kirti
On 7/8/2017 3:45 AM, Alex Williamson wrote:
> The original intent of vfio_container.group_lock is to protect
> vfio_container.group_list, however over time it's become a crutch to
> prevent changes in container composition any time we call into the
> iommu driver backend. This introduces problems when we start to have
> more complex interactions, for example when a user's DMA unmap request
> triggers a notification to an mdev vendor driver, who responds by
> attempting to unpin mappings within that request, re-entering the
> iommu backend. We incorrectly assume that the use of read-locks here
> allow for this nested locking behavior, but a poorly timed write-lock
> could in fact trigger a deadlock.
>
> The current use of group_lock seems to fall into the trap of locking
> code, not data. Correct that by removing uses of group_lock that are
> not directly related to group_list. Note that the vfio type1 iommu
> backend has its own mutex, vfio_iommu.lock, which it uses to protect
> itself for each of these interfaces anyway. The group_lock appears to
> be a redundancy for these interfaces and type1 even goes so far as to
> release its mutex to allow for exactly the re-entrant code path above.
>
> Reported-by: Chuanxiao Dong <[email protected]>
> Signed-off-by: Alex Williamson <[email protected]>
> ---
>
> Alexey, does the SPAPR/TCE iommu backend have any dependencies on this
> lock? If so, let's create a lock in the spapr_tce backend like we
> have in type1 to handle it. I believe the ioctl passthrough is the
> only interface that can reach spapr_tce. Thanks,
>
> Alex
>
> drivers/vfio/vfio.c | 38 --------------------------------------
> 1 file changed, 38 deletions(-)
>
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 7597a377eb4e..330d50582f40 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1175,15 +1175,11 @@ static long vfio_fops_unl_ioctl(struct file *filep,
> ret = vfio_ioctl_set_iommu(container, arg);
> break;
> default:
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> data = container->iommu_data;
>
> if (driver) /* passthrough all unrecognized ioctls */
> ret = driver->ops->ioctl(data, cmd, arg);
> -
> - up_read(&container->group_lock);
> }
>
> return ret;
> @@ -1237,15 +1233,11 @@ static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
> struct vfio_iommu_driver *driver;
> ssize_t ret = -EINVAL;
>
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->read))
> ret = driver->ops->read(container->iommu_data,
> buf, count, ppos);
>
> - up_read(&container->group_lock);
> -
> return ret;
> }
>
> @@ -1256,15 +1248,11 @@ static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
> struct vfio_iommu_driver *driver;
> ssize_t ret = -EINVAL;
>
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->write))
> ret = driver->ops->write(container->iommu_data,
> buf, count, ppos);
>
> - up_read(&container->group_lock);
> -
> return ret;
> }
>
> @@ -1274,14 +1262,10 @@ static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
> struct vfio_iommu_driver *driver;
> int ret = -EINVAL;
>
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->mmap))
> ret = driver->ops->mmap(container->iommu_data, vma);
>
> - up_read(&container->group_lock);
> -
> return ret;
> }
>
> @@ -1993,8 +1977,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
> goto err_pin_pages;
>
> container = group->container;
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->pin_pages))
> ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
> @@ -2002,7 +1984,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
> else
> ret = -ENOTTY;
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> err_pin_pages:
> @@ -2042,8 +2023,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
> goto err_unpin_pages;
>
> container = group->container;
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->unpin_pages))
> ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
> @@ -2051,7 +2030,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
> else
> ret = -ENOTTY;
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> err_unpin_pages:
> @@ -2073,8 +2051,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
> return -EINVAL;
>
> container = group->container;
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->register_notifier))
> ret = driver->ops->register_notifier(container->iommu_data,
> @@ -2082,7 +2058,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
> else
> ret = -ENOTTY;
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> return ret;
> @@ -2100,8 +2075,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> return -EINVAL;
>
> container = group->container;
> - down_read(&container->group_lock);
> -
> driver = container->iommu_driver;
> if (likely(driver && driver->ops->unregister_notifier))
> ret = driver->ops->unregister_notifier(container->iommu_data,
> @@ -2109,7 +2082,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> else
> ret = -ENOTTY;
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> return ret;
> @@ -2127,7 +2099,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
> unsigned long *events,
> struct notifier_block *nb)
> {
> - struct vfio_container *container;
> int ret;
> bool set_kvm = false;
>
> @@ -2145,9 +2116,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
> if (ret)
> return -EINVAL;
>
> - container = group->container;
> - down_read(&container->group_lock);
> -
> ret = blocking_notifier_chain_register(&group->notifier, nb);
>
> /*
> @@ -2158,7 +2126,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
> blocking_notifier_call_chain(&group->notifier,
> VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> return ret;
> @@ -2167,19 +2134,14 @@ static int vfio_register_group_notifier(struct vfio_group *group,
> static int vfio_unregister_group_notifier(struct vfio_group *group,
> struct notifier_block *nb)
> {
> - struct vfio_container *container;
> int ret;
>
> ret = vfio_group_add_container_user(group);
> if (ret)
> return -EINVAL;
>
> - container = group->container;
> - down_read(&container->group_lock);
> -
> ret = blocking_notifier_chain_unregister(&group->notifier, nb);
>
> - up_read(&container->group_lock);
> vfio_group_try_dissolve_container(group);
>
> return ret;
>