2022-02-18 19:47:30

by Tom Rix

[permalink] [raw]
Subject: [PATCH] drm/amdkfd: rework criu_restore_bos error handling

From: Tom Rix <[email protected]>

Clang static analysis reports this problem
kfd_chardev.c:2327:2: warning: 1st function call argument
is an uninitialized value
kvfree(bo_privs);
^~~~~~~~~~~~~~~~

If the copy_from_users(bo_buckets, ...) fails, there is a jump to
the generic error handler at exit:. The freeing of bo_privs and
unwinding of the dmabuf_fd loop do not need to be done.

Add some specific labels for the early failures.
Reorder the frees to be the reverse of their allocs.

Move the initialize of 'i' back to the loop.
The problem with the early frees predates the loop
unwinding problem.

Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
Signed-off-by: Tom Rix <[email protected]>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 965af2a08bc0..1d5f41ac3832 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process *p,
const bool criu_resume = true;
bool flush_tlbs = false;
int ret = 0, j = 0;
- uint32_t i = 0;
+ uint32_t i;

if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
return -EINVAL;
@@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct kfd_process *p,
if (ret) {
pr_err("Failed to copy BOs information from user\n");
ret = -EFAULT;
- goto exit;
+ goto free_buckets;
}

bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
if (!bo_privs) {
ret = -ENOMEM;
- goto exit;
+ goto free_buckets;
}

ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
@@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct kfd_process *p,
if (ret) {
pr_err("Failed to copy BOs information from user\n");
ret = -EFAULT;
- goto exit;
+ goto free_privs;
}
*priv_offset += args->num_bos * sizeof(*bo_privs);

/* Create and map new BOs */
- for (; i < args->num_bos; i++) {
+ for (i = 0; i < args->num_bos; i++) {
struct kfd_criu_bo_bucket *bo_bucket;
struct kfd_criu_bo_priv_data *bo_priv;
struct kfd_dev *dev;
@@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct kfd_process *p,
if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
close_fd(bo_buckets[i].dmabuf_fd);
}
- kvfree(bo_buckets);
+free_privs:
kvfree(bo_privs);
+free_buckets:
+ kvfree(bo_buckets);
+
return ret;
}

--
2.26.3


2022-02-19 17:38:46

by Felix Kuehling

[permalink] [raw]
Subject: Re: [PATCH] drm/amdkfd: rework criu_restore_bos error handling


Am 2022-02-18 um 21:34 schrieb Tom Rix:
>
> On 2/18/22 10:35 AM, Felix Kuehling wrote:
>> Am 2022-02-18 um 12:39 schrieb [email protected]:
>>> From: Tom Rix <[email protected]>
>>>
>>> Clang static analysis reports this problem
>>> kfd_chardev.c:2327:2: warning: 1st function call argument
>>>    is an uninitialized value
>>>    kvfree(bo_privs);
>>>    ^~~~~~~~~~~~~~~~
>>>
>>> If the copy_from_users(bo_buckets, ...) fails, there is a jump to
>>> the generic error handler at exit:.  The freeing of bo_privs and
>>> unwinding of the dmabuf_fd loop do not need to be done.
>>>
>>> Add some specific labels for the early failures.
>>> Reorder the frees to be the reverse of their allocs.
>>>
>>> Move the initialize of 'i' back to the loop.
>>> The problem with the early frees predates the loop
>>> unwinding problem.
>>
>> I think the existing error handling strategy in this function is
>> fine. Having only one exit label avoids potential issues when using
>> the wrong label. Freeing NULL pointers is not a problem. The loop
>> becomes a noop if i==0 (this was fixed by you in a previous patch).
>> The only real problem I see is that bo_privs is not initialized. So
>> this should really be a one-line or maybe two-line fix:
>>
>>     struct kfd_criu_bo_bucket *bo_buckets = NULL;
>>     struct kfd_criu_bo_priv_data *bo_privs = NULL;
>
> This is the other way I considered to fix the problem. So it will work.

OK. I have already submitted this version to amd-staging-drm-next. Thank
you for reporting the problem.

Regards,
  Felix


>
> Tom
>
>>
>> Regards,
>>   Felix
>>
>>
>>>
>>> Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
>>> Signed-off-by: Tom Rix <[email protected]>
>>> ---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
>>>   1 file changed, 9 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index 965af2a08bc0..1d5f41ac3832 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process
>>> *p,
>>>       const bool criu_resume = true;
>>>       bool flush_tlbs = false;
>>>       int ret = 0, j = 0;
>>> -    uint32_t i = 0;
>>> +    uint32_t i;
>>>         if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) >
>>> max_priv_data_size)
>>>           return -EINVAL;
>>> @@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct
>>> kfd_process *p,
>>>       if (ret) {
>>>           pr_err("Failed to copy BOs information from user\n");
>>>           ret = -EFAULT;
>>> -        goto exit;
>>> +        goto free_buckets;
>>>       }
>>>         bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs),
>>> GFP_KERNEL);
>>>       if (!bo_privs) {
>>>           ret = -ENOMEM;
>>> -        goto exit;
>>> +        goto free_buckets;
>>>       }
>>>         ret = copy_from_user(bo_privs, (void __user
>>> *)args->priv_data + *priv_offset,
>>> @@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct
>>> kfd_process *p,
>>>       if (ret) {
>>>           pr_err("Failed to copy BOs information from user\n");
>>>           ret = -EFAULT;
>>> -        goto exit;
>>> +        goto free_privs;
>>>       }
>>>       *priv_offset += args->num_bos * sizeof(*bo_privs);
>>>         /* Create and map new BOs */
>>> -    for (; i < args->num_bos; i++) {
>>> +    for (i = 0; i < args->num_bos; i++) {
>>>           struct kfd_criu_bo_bucket *bo_bucket;
>>>           struct kfd_criu_bo_priv_data *bo_priv;
>>>           struct kfd_dev *dev;
>>> @@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct
>>> kfd_process *p,
>>>           if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
>>>               close_fd(bo_buckets[i].dmabuf_fd);
>>>       }
>>> -    kvfree(bo_buckets);
>>> +free_privs:
>>>       kvfree(bo_privs);
>>> +free_buckets:
>>> +    kvfree(bo_buckets);
>>> +
>>>       return ret;
>>>   }
>>
>

2022-02-20 10:08:58

by Felix Kuehling

[permalink] [raw]
Subject: Re: [PATCH] drm/amdkfd: rework criu_restore_bos error handling

Am 2022-02-18 um 12:39 schrieb [email protected]:
> From: Tom Rix <[email protected]>
>
> Clang static analysis reports this problem
> kfd_chardev.c:2327:2: warning: 1st function call argument
> is an uninitialized value
> kvfree(bo_privs);
> ^~~~~~~~~~~~~~~~
>
> If the copy_from_users(bo_buckets, ...) fails, there is a jump to
> the generic error handler at exit:. The freeing of bo_privs and
> unwinding of the dmabuf_fd loop do not need to be done.
>
> Add some specific labels for the early failures.
> Reorder the frees to be the reverse of their allocs.
>
> Move the initialize of 'i' back to the loop.
> The problem with the early frees predates the loop
> unwinding problem.

I think the existing error handling strategy in this function is fine.
Having only one exit label avoids potential issues when using the wrong
label. Freeing NULL pointers is not a problem. The loop becomes a noop
if i==0 (this was fixed by you in a previous patch). The only real
problem I see is that bo_privs is not initialized. So this should really
be a one-line or maybe two-line fix:

struct kfd_criu_bo_bucket *bo_buckets = NULL;
struct kfd_criu_bo_priv_data *bo_privs = NULL;

Regards,
  Felix


>
> Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
> Signed-off-by: Tom Rix <[email protected]>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
> 1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 965af2a08bc0..1d5f41ac3832 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process *p,
> const bool criu_resume = true;
> bool flush_tlbs = false;
> int ret = 0, j = 0;
> - uint32_t i = 0;
> + uint32_t i;
>
> if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
> return -EINVAL;
> @@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct kfd_process *p,
> if (ret) {
> pr_err("Failed to copy BOs information from user\n");
> ret = -EFAULT;
> - goto exit;
> + goto free_buckets;
> }
>
> bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
> if (!bo_privs) {
> ret = -ENOMEM;
> - goto exit;
> + goto free_buckets;
> }
>
> ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
> @@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct kfd_process *p,
> if (ret) {
> pr_err("Failed to copy BOs information from user\n");
> ret = -EFAULT;
> - goto exit;
> + goto free_privs;
> }
> *priv_offset += args->num_bos * sizeof(*bo_privs);
>
> /* Create and map new BOs */
> - for (; i < args->num_bos; i++) {
> + for (i = 0; i < args->num_bos; i++) {
> struct kfd_criu_bo_bucket *bo_bucket;
> struct kfd_criu_bo_priv_data *bo_priv;
> struct kfd_dev *dev;
> @@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct kfd_process *p,
> if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
> close_fd(bo_buckets[i].dmabuf_fd);
> }
> - kvfree(bo_buckets);
> +free_privs:
> kvfree(bo_privs);
> +free_buckets:
> + kvfree(bo_buckets);
> +
> return ret;
> }
>

2022-02-20 12:00:09

by Tom Rix

[permalink] [raw]
Subject: Re: [PATCH] drm/amdkfd: rework criu_restore_bos error handling


On 2/18/22 10:35 AM, Felix Kuehling wrote:
> Am 2022-02-18 um 12:39 schrieb [email protected]:
>> From: Tom Rix <[email protected]>
>>
>> Clang static analysis reports this problem
>> kfd_chardev.c:2327:2: warning: 1st function call argument
>>    is an uninitialized value
>>    kvfree(bo_privs);
>>    ^~~~~~~~~~~~~~~~
>>
>> If the copy_from_users(bo_buckets, ...) fails, there is a jump to
>> the generic error handler at exit:.  The freeing of bo_privs and
>> unwinding of the dmabuf_fd loop do not need to be done.
>>
>> Add some specific labels for the early failures.
>> Reorder the frees to be the reverse of their allocs.
>>
>> Move the initialize of 'i' back to the loop.
>> The problem with the early frees predates the loop
>> unwinding problem.
>
> I think the existing error handling strategy in this function is fine.
> Having only one exit label avoids potential issues when using the
> wrong label. Freeing NULL pointers is not a problem. The loop becomes
> a noop if i==0 (this was fixed by you in a previous patch). The only
> real problem I see is that bo_privs is not initialized. So this should
> really be a one-line or maybe two-line fix:
>
>     struct kfd_criu_bo_bucket *bo_buckets = NULL;
>     struct kfd_criu_bo_priv_data *bo_privs = NULL;

This is the other way I considered to fix the problem. So it will work.

Tom

>
> Regards,
>   Felix
>
>
>>
>> Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
>> Signed-off-by: Tom Rix <[email protected]>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
>>   1 file changed, 9 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 965af2a08bc0..1d5f41ac3832 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process *p,
>>       const bool criu_resume = true;
>>       bool flush_tlbs = false;
>>       int ret = 0, j = 0;
>> -    uint32_t i = 0;
>> +    uint32_t i;
>>         if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) >
>> max_priv_data_size)
>>           return -EINVAL;
>> @@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct
>> kfd_process *p,
>>       if (ret) {
>>           pr_err("Failed to copy BOs information from user\n");
>>           ret = -EFAULT;
>> -        goto exit;
>> +        goto free_buckets;
>>       }
>>         bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs),
>> GFP_KERNEL);
>>       if (!bo_privs) {
>>           ret = -ENOMEM;
>> -        goto exit;
>> +        goto free_buckets;
>>       }
>>         ret = copy_from_user(bo_privs, (void __user *)args->priv_data
>> + *priv_offset,
>> @@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct
>> kfd_process *p,
>>       if (ret) {
>>           pr_err("Failed to copy BOs information from user\n");
>>           ret = -EFAULT;
>> -        goto exit;
>> +        goto free_privs;
>>       }
>>       *priv_offset += args->num_bos * sizeof(*bo_privs);
>>         /* Create and map new BOs */
>> -    for (; i < args->num_bos; i++) {
>> +    for (i = 0; i < args->num_bos; i++) {
>>           struct kfd_criu_bo_bucket *bo_bucket;
>>           struct kfd_criu_bo_priv_data *bo_priv;
>>           struct kfd_dev *dev;
>> @@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct kfd_process
>> *p,
>>           if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
>>               close_fd(bo_buckets[i].dmabuf_fd);
>>       }
>> -    kvfree(bo_buckets);
>> +free_privs:
>>       kvfree(bo_privs);
>> +free_buckets:
>> +    kvfree(bo_buckets);
>> +
>>       return ret;
>>   }
>

2022-02-21 09:28:45

by Christian König

[permalink] [raw]
Subject: Re: [PATCH] drm/amdkfd: rework criu_restore_bos error handling

Am 18.02.22 um 19:35 schrieb Felix Kuehling:
> Am 2022-02-18 um 12:39 schrieb [email protected]:
>> From: Tom Rix <[email protected]>
>>
>> Clang static analysis reports this problem
>> kfd_chardev.c:2327:2: warning: 1st function call argument
>>    is an uninitialized value
>>    kvfree(bo_privs);
>>    ^~~~~~~~~~~~~~~~
>>
>> If the copy_from_users(bo_buckets, ...) fails, there is a jump to
>> the generic error handler at exit:.  The freeing of bo_privs and
>> unwinding of the dmabuf_fd loop do not need to be done.
>>
>> Add some specific labels for the early failures.
>> Reorder the frees to be the reverse of their allocs.
>>
>> Move the initialize of 'i' back to the loop.
>> The problem with the early frees predates the loop
>> unwinding problem.
>
> I think the existing error handling strategy in this function is fine.
> Having only one exit label avoids potential issues when using the
> wrong label. Freeing NULL pointers is not a problem. The loop becomes
> a noop if i==0 (this was fixed by you in a previous patch). The only
> real problem I see is that bo_privs is not initialized. So this should
> really be a one-line or maybe two-line fix:
>
>     struct kfd_criu_bo_bucket *bo_buckets = NULL;
>     struct kfd_criu_bo_priv_data *bo_privs = NULL;

That is usually seen as very bad practice and there is a strongly
discourage against that.

Using multiple error handling labels is preferred because it reflects
the cleanup procedure you need to do for each step.

In other words one error label and one kfree() for each kmalloc() (or
other function which can go wrong) is the desired way to go.

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>>
>> Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
>> Signed-off-by: Tom Rix <[email protected]>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
>>   1 file changed, 9 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 965af2a08bc0..1d5f41ac3832 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process *p,
>>       const bool criu_resume = true;
>>       bool flush_tlbs = false;
>>       int ret = 0, j = 0;
>> -    uint32_t i = 0;
>> +    uint32_t i;
>>         if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) >
>> max_priv_data_size)
>>           return -EINVAL;
>> @@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct
>> kfd_process *p,
>>       if (ret) {
>>           pr_err("Failed to copy BOs information from user\n");
>>           ret = -EFAULT;
>> -        goto exit;
>> +        goto free_buckets;
>>       }
>>         bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs),
>> GFP_KERNEL);
>>       if (!bo_privs) {
>>           ret = -ENOMEM;
>> -        goto exit;
>> +        goto free_buckets;
>>       }
>>         ret = copy_from_user(bo_privs, (void __user *)args->priv_data
>> + *priv_offset,
>> @@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct
>> kfd_process *p,
>>       if (ret) {
>>           pr_err("Failed to copy BOs information from user\n");
>>           ret = -EFAULT;
>> -        goto exit;
>> +        goto free_privs;
>>       }
>>       *priv_offset += args->num_bos * sizeof(*bo_privs);
>>         /* Create and map new BOs */
>> -    for (; i < args->num_bos; i++) {
>> +    for (i = 0; i < args->num_bos; i++) {
>>           struct kfd_criu_bo_bucket *bo_bucket;
>>           struct kfd_criu_bo_priv_data *bo_priv;
>>           struct kfd_dev *dev;
>> @@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct kfd_process
>> *p,
>>           if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
>>               close_fd(bo_buckets[i].dmabuf_fd);
>>       }
>> -    kvfree(bo_buckets);
>> +free_privs:
>>       kvfree(bo_privs);
>> +free_buckets:
>> +    kvfree(bo_buckets);
>> +
>>       return ret;
>>   }