LinuxLists.cc - [PATCH] drm/amdkfd: fix potential kgd

2023-03-08 21:39:35

Subject: [PATCH] drm/amdkfd: fix potential kgd_mem UAFs

kgd_mem should be accessed with p->mutex locked, or it could have been
freed by kfd_ioctl_free_memory_of_gpu.

Signed-off-by: Chia-I Wu <[email protected]>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6d291aa6386bd..3c630114210d6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1293,14 +1293,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
args->n_success = i+1;
}

- mutex_unlock(&p->mutex);
-
err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}

+ mutex_unlock(&p->mutex);
+
/* Flush TLBs after waiting for the page table updates to complete */
for (i = 0; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
@@ -1316,9 +1316,9 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
bind_process_to_device_failed:
get_mem_obj_from_handle_failed:
map_memory_to_gpu_failed:
+sync_memory_failed:
mutex_unlock(&p->mutex);
copy_from_user_failed:
-sync_memory_failed:
kfree(devices_arr);

return err;
@@ -1332,6 +1332,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
void *mem;
long err = 0;
uint32_t *devices_arr = NULL, i;
+ bool flush_tlb;

if (!args->n_devices) {
pr_debug("Device IDs array empty\n");
@@ -1384,16 +1385,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
args->n_success = i+1;
}
- mutex_unlock(&p->mutex);

- if (kfd_flush_tlb_after_unmap(pdd->dev)) {
+ flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev);
+ if (flush_tlb) {
err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
(struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
+ }
+ mutex_unlock(&p->mutex);

+ if (flush_tlb) {
/* Flush TLBs after waiting for the page table updates to complete */
for (i = 0; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
@@ -1409,9 +1413,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
bind_process_to_device_failed:
get_mem_obj_from_handle_failed:
unmap_memory_from_gpu_failed:
+sync_memory_failed:
mutex_unlock(&p->mutex);
copy_from_user_failed:
-sync_memory_failed:
kfree(devices_arr);
return err;
}
--
2.40.0.rc1.284.g88254d51c5-goog

2023-03-09 17:49:41

by Felix Kuehling

[permalink] [raw]

Subject: Re: [PATCH] drm/amdkfd: fix potential kgd_mem UAFs

Am 2023-03-08 um 16:37 schrieb Chia-I Wu:
> kgd_mem should be accessed with p->mutex locked, or it could have been
> freed by kfd_ioctl_free_memory_of_gpu.

Thank you for the patch. It's not just about accessing kgd_mem with
p->mutex held. It's also about holding the mutex continuously. I'd
update the description to be more explicit about the invariant being
broken here:

kgd_mem pointers returned by kfd_process_device_translate_handle are
only guaranteed to be valid while p->mutex is held. As soon as the mutex
is unlocked, another thread can free the BO.

I can update the description and submit the patch.

Reviewed-by: Felix Kuehling <[email protected]>

Regards,
Felix

>
> Signed-off-by: Chia-I Wu <[email protected]>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++------
> 1 file changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 6d291aa6386bd..3c630114210d6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1293,14 +1293,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
> args->n_success = i+1;
> }
>
> - mutex_unlock(&p->mutex);
> -
> err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
> if (err) {
> pr_debug("Sync memory failed, wait interrupted by user signal\n");
> goto sync_memory_failed;
> }
>
> + mutex_unlock(&p->mutex);
> +
> /* Flush TLBs after waiting for the page table updates to complete */
> for (i = 0; i < args->n_devices; i++) {
> peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
> @@ -1316,9 +1316,9 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
> bind_process_to_device_failed:
> get_mem_obj_from_handle_failed:
> map_memory_to_gpu_failed:
> +sync_memory_failed:
> mutex_unlock(&p->mutex);
> copy_from_user_failed:
> -sync_memory_failed:
> kfree(devices_arr);
>
> return err;
> @@ -1332,6 +1332,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
> void *mem;
> long err = 0;
> uint32_t *devices_arr = NULL, i;
> + bool flush_tlb;
>
> if (!args->n_devices) {
> pr_debug("Device IDs array empty\n");
> @@ -1384,16 +1385,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
> }
> args->n_success = i+1;
> }
> - mutex_unlock(&p->mutex);
>
> - if (kfd_flush_tlb_after_unmap(pdd->dev)) {
> + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev);
> + if (flush_tlb) {
> err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
> (struct kgd_mem *) mem, true);
> if (err) {
> pr_debug("Sync memory failed, wait interrupted by user signal\n");
> goto sync_memory_failed;
> }
> + }
> + mutex_unlock(&p->mutex);
>
> + if (flush_tlb) {
> /* Flush TLBs after waiting for the page table updates to complete */
> for (i = 0; i < args->n_devices; i++) {
> peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
> @@ -1409,9 +1413,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
> bind_process_to_device_failed:
> get_mem_obj_from_handle_failed:
> unmap_memory_from_gpu_failed:
> +sync_memory_failed:
> mutex_unlock(&p->mutex);
> copy_from_user_failed:
> -sync_memory_failed:
> kfree(devices_arr);
> return err;
> }