2022-09-09 17:47:31

by Arvind Yadav

[permalink] [raw]
Subject: [PATCH v3 0/6] dma-buf: Check status of enable-signaling bit on debug

Fence signaling must be enabled to make sure that
the dma_fence_is_signaled() function ever returns true.
Since drivers and implementations sometimes mess this up,
this ensures correct behaviour when DEBUG_WW_MUTEX_SLOWPATH
is used during debugging.
This should make any implementation bugs resulting in not
signaled fences much more obvious.


Arvind Yadav (6):
[PATCH v3 1/6] dma-buf: Remove the signaled bit status check
[PATCH v3 2/6] dma-buf: set signaling bit for the stub fence
[PATCH v3 3/6] dma-buf: Enable signaling on fence for selftests
[PATCH v3 4/6] drm/amdgpu: Enable signaling on fence.
[PATCH v3 5/6] drm/sched: Use parent fence instead of finished
[PATCH v3 6/6] dma-buf: Check status of enable-signaling bit on debug

drivers/dma-buf/dma-fence.c | 7 ++++---
drivers/dma-buf/st-dma-fence-chain.c | 4 ++++
drivers/dma-buf/st-dma-fence-unwrap.c | 22 ++++++++++++++++++++++
drivers/dma-buf/st-dma-fence.c | 16 ++++++++++++++++
drivers/dma-buf/st-dma-resv.c | 10 ++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 ++
drivers/gpu/drm/scheduler/sched_main.c | 4 ++--
include/linux/dma-fence.h | 5 +++++
8 files changed, 65 insertions(+), 5 deletions(-)

--
2.25.1


2022-09-09 17:53:22

by Arvind Yadav

[permalink] [raw]
Subject: [PATCH v3 3/6] dma-buf: Enable signaling on fence for selftests

Here's enabling software signaling on fence for selftest.

Signed-off-by: Arvind Yadav <[email protected]>
---

Changes in v1 :
1- Addressing Christian's comment to remove unnecessary callback.
2- Replacing CONFIG_DEBUG_WW_MUTEX_SLOWPATH instead of CONFIG_DEBUG_FS.
3- The version of this patch is also changed and previously
it was [PATCH 4/4]

Changes in v2 :
1- #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH removed

---
drivers/dma-buf/st-dma-fence-chain.c | 4 ++++
drivers/dma-buf/st-dma-fence-unwrap.c | 22 ++++++++++++++++++++++
drivers/dma-buf/st-dma-fence.c | 16 ++++++++++++++++
drivers/dma-buf/st-dma-resv.c | 10 ++++++++++
4 files changed, 52 insertions(+)

diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
index 8ce1ea59d31b..0a9b099d0518 100644
--- a/drivers/dma-buf/st-dma-fence-chain.c
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -87,6 +87,8 @@ static int sanitycheck(void *arg)
if (!chain)
err = -ENOMEM;

+ dma_fence_enable_sw_signaling(chain);
+
dma_fence_signal(f);
dma_fence_put(f);

@@ -143,6 +145,8 @@ static int fence_chains_init(struct fence_chains *fc, unsigned int count,
}

fc->tail = fc->chains[i];
+
+ dma_fence_enable_sw_signaling(fc->chains[i]);
}

fc->chain_length = i;
diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c
index 4105d5ea8dde..f0cee984b6c7 100644
--- a/drivers/dma-buf/st-dma-fence-unwrap.c
+++ b/drivers/dma-buf/st-dma-fence-unwrap.c
@@ -102,6 +102,8 @@ static int sanitycheck(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
array = mock_array(1, f);
if (!array)
return -ENOMEM;
@@ -124,12 +126,16 @@ static int unwrap_array(void *arg)
if (!f1)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f1);
+
f2 = mock_fence();
if (!f2) {
dma_fence_put(f1);
return -ENOMEM;
}

+ dma_fence_enable_sw_signaling(f2);
+
array = mock_array(2, f1, f2);
if (!array)
return -ENOMEM;
@@ -164,12 +170,16 @@ static int unwrap_chain(void *arg)
if (!f1)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f1);
+
f2 = mock_fence();
if (!f2) {
dma_fence_put(f1);
return -ENOMEM;
}

+ dma_fence_enable_sw_signaling(f2);
+
chain = mock_chain(f1, f2);
if (!chain)
return -ENOMEM;
@@ -204,12 +214,16 @@ static int unwrap_chain_array(void *arg)
if (!f1)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f1);
+
f2 = mock_fence();
if (!f2) {
dma_fence_put(f1);
return -ENOMEM;
}

+ dma_fence_enable_sw_signaling(f2);
+
array = mock_array(2, f1, f2);
if (!array)
return -ENOMEM;
@@ -248,12 +262,16 @@ static int unwrap_merge(void *arg)
if (!f1)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f1);
+
f2 = mock_fence();
if (!f2) {
err = -ENOMEM;
goto error_put_f1;
}

+ dma_fence_enable_sw_signaling(f2);
+
f3 = dma_fence_unwrap_merge(f1, f2);
if (!f3) {
err = -ENOMEM;
@@ -296,10 +314,14 @@ static int unwrap_merge_complex(void *arg)
if (!f1)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f1);
+
f2 = mock_fence();
if (!f2)
goto error_put_f1;

+ dma_fence_enable_sw_signaling(f2);
+
f3 = dma_fence_unwrap_merge(f1, f2);
if (!f3)
goto error_put_f2;
diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c
index c8a12d7ad71a..fb6e0a6ae2c9 100644
--- a/drivers/dma-buf/st-dma-fence.c
+++ b/drivers/dma-buf/st-dma-fence.c
@@ -102,6 +102,8 @@ static int sanitycheck(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
dma_fence_signal(f);
dma_fence_put(f);

@@ -117,6 +119,8 @@ static int test_signaling(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
if (dma_fence_is_signaled(f)) {
pr_err("Fence unexpectedly signaled on creation\n");
goto err_free;
@@ -190,6 +194,8 @@ static int test_late_add_callback(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
dma_fence_signal(f);

if (!dma_fence_add_callback(f, &cb.cb, simple_callback)) {
@@ -282,6 +288,8 @@ static int test_status(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
if (dma_fence_get_status(f)) {
pr_err("Fence unexpectedly has signaled status on creation\n");
goto err_free;
@@ -308,6 +316,8 @@ static int test_error(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
dma_fence_set_error(f, -EIO);

if (dma_fence_get_status(f)) {
@@ -337,6 +347,8 @@ static int test_wait(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
if (dma_fence_wait_timeout(f, false, 0) != -ETIME) {
pr_err("Wait reported complete before being signaled\n");
goto err_free;
@@ -379,6 +391,8 @@ static int test_wait_timeout(void *arg)
if (!wt.f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(wt.f);
+
if (dma_fence_wait_timeout(wt.f, false, 1) != -ETIME) {
pr_err("Wait reported complete before being signaled\n");
goto err_free;
@@ -458,6 +472,8 @@ static int thread_signal_callback(void *arg)
break;
}

+ dma_fence_enable_sw_signaling(f1);
+
rcu_assign_pointer(t->fences[t->id], f1);
smp_wmb();

diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
index 813779e3c9be..15dbea1462ed 100644
--- a/drivers/dma-buf/st-dma-resv.c
+++ b/drivers/dma-buf/st-dma-resv.c
@@ -45,6 +45,8 @@ static int sanitycheck(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
dma_fence_signal(f);
dma_fence_put(f);

@@ -69,6 +71,8 @@ static int test_signaling(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
dma_resv_init(&resv);
r = dma_resv_lock(&resv, NULL);
if (r) {
@@ -114,6 +118,8 @@ static int test_for_each(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
dma_resv_init(&resv);
r = dma_resv_lock(&resv, NULL);
if (r) {
@@ -173,6 +179,8 @@ static int test_for_each_unlocked(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
dma_resv_init(&resv);
r = dma_resv_lock(&resv, NULL);
if (r) {
@@ -244,6 +252,8 @@ static int test_get_fences(void *arg)
if (!f)
return -ENOMEM;

+ dma_fence_enable_sw_signaling(f);
+
dma_resv_init(&resv);
r = dma_resv_lock(&resv, NULL);
if (r) {
--
2.25.1

2022-09-09 17:53:32

by Arvind Yadav

[permalink] [raw]
Subject: [PATCH v3 4/6] drm/amdgpu: Enable signaling on fence.

Here's enabling software signaling on fence because
amdgpu_ctx_add_fence() is checking the status of fence
and emits warning.

Signed-off-by: Arvind Yadav <[email protected]>
---

Changes in v1, v2: This new patch was not part of previous series.

---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index afe22f83d4a6..21221d705588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -730,6 +730,8 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,

dma_fence_get(fence);

+ dma_fence_enable_sw_signaling(fence);
+
spin_lock(&ctx->ring_lock);
centity->fences[idx] = fence;
centity->sequence++;
--
2.25.1

2022-09-09 17:54:14

by Arvind Yadav

[permalink] [raw]
Subject: [PATCH v3 2/6] dma-buf: set signaling bit for the stub fence

Here's setting software signaling bit for the stub fence
which is always signaled. If this fence signaling bit is
not set then the AMD GPU scheduler will cause a GPU reset
due to a GPU scheduler cleanup activity timeout.

Signed-off-by: Arvind Yadav <[email protected]>
---

Changes in v1 :
1- Addressing Christian's comment to remove unnecessary callback.
2- Replacing CONFIG_DEBUG_WW_MUTEX_SLOWPATH instead of CONFIG_DEBUG_FS.
3- The version of this patch is also changed and previously
it was [PATCH 3/4]

Changes in v2 :
1 - perviously using __dma_fence_enable_signaling() for enable
signaling.
2 - #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH removed

---
drivers/dma-buf/dma-fence.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 64c99739ad23..bead1a6e9f59 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -136,6 +136,10 @@ struct dma_fence *dma_fence_get_stub(void)
&dma_fence_stub_ops,
&dma_fence_stub_lock,
0, 0);
+
+ set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &dma_fence_stub.flags);
+
dma_fence_signal_locked(&dma_fence_stub);
}
spin_unlock(&dma_fence_stub_lock);
--
2.25.1

2022-09-09 18:15:28

by Arvind Yadav

[permalink] [raw]
Subject: [PATCH v3 6/6] dma-buf: Check status of enable-signaling bit on debug

Fence signaling must be enabled to make sure that
the dma_fence_is_signaled() function ever returns true.
Since drivers and implementations sometimes mess this up,
this ensures correct behaviour when DEBUG_WW_MUTEX_SLOWPATH
is used during debugging.
This should make any implementation bugs resulting in not
signaled fences much more obvious.

Signed-off-by: Arvind Yadav <[email protected]>
---

Changes in v1,v2 :
1- Addressing Christian's comment to replace
CONFIG_DEBUG_WW_MUTEX_SLOWPATH instead of CONFIG_DEBUG_FS.
2- As per Christian's comment moving this patch at last so
The version of this patch is also changed and previously
it was [PATCH 1/4]

---
include/linux/dma-fence.h | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 775cdc0b4f24..ba1ddc14c5d4 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -428,6 +428,11 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
static inline bool
dma_fence_is_signaled(struct dma_fence *fence)
{
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+ if (!test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags))
+ return false;
+#endif
+
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return true;

--
2.25.1

2022-09-09 18:18:09

by Arvind Yadav

[permalink] [raw]
Subject: [PATCH v3 5/6] drm/sched: Use parent fence instead of finished

Using the parent fence instead of the finished fence
to get the job status. This change is to avoid GPU
scheduler timeout error which can cause GPU reset.

Signed-off-by: Arvind Yadav <[email protected]>
---

changes in v1,v2 - Enable signaling for finished fence in sche_main()
is removed

---
drivers/gpu/drm/scheduler/sched_main.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index e0ab14e0fb6b..2ac28ad11432 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -829,7 +829,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
job = list_first_entry_or_null(&sched->pending_list,
struct drm_sched_job, list);

- if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
+ if (job && dma_fence_is_signaled(job->s_fence->parent)) {
/* remove job from pending_list */
list_del_init(&job->list);

@@ -841,7 +841,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)

if (next) {
next->s_fence->scheduled.timestamp =
- job->s_fence->finished.timestamp;
+ job->s_fence->parent->timestamp;
/* start TO timer for next job */
drm_sched_start_timeout(sched);
}
--
2.25.1

2022-09-09 18:29:10

by Andrey Grodzovsky

[permalink] [raw]
Subject: Re: [PATCH v3 5/6] drm/sched: Use parent fence instead of finished

What exactly is the scenario which this patch fixes in more detail please  ?

Andrey

On 2022-09-09 13:08, Arvind Yadav wrote:
> Using the parent fence instead of the finished fence
> to get the job status. This change is to avoid GPU
> scheduler timeout error which can cause GPU reset.
>
> Signed-off-by: Arvind Yadav <[email protected]>
> ---
>
> changes in v1,v2 - Enable signaling for finished fence in sche_main()
> is removed
>
> ---
> drivers/gpu/drm/scheduler/sched_main.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index e0ab14e0fb6b..2ac28ad11432 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -829,7 +829,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
> job = list_first_entry_or_null(&sched->pending_list,
> struct drm_sched_job, list);
>
> - if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
> + if (job && dma_fence_is_signaled(job->s_fence->parent)) {
> /* remove job from pending_list */
> list_del_init(&job->list);
>
> @@ -841,7 +841,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
>
> if (next) {
> next->s_fence->scheduled.timestamp =
> - job->s_fence->finished.timestamp;
> + job->s_fence->parent->timestamp;
> /* start TO timer for next job */
> drm_sched_start_timeout(sched);
> }

2022-09-09 20:53:10

by Yadav, Arvind

[permalink] [raw]
Subject: Re: [PATCH v3 5/6] drm/sched: Use parent fence instead of finished


On 9/9/2022 11:02 PM, Andrey Grodzovsky wrote:
> What exactly is the scenario which this patch fixes in more detail
> please  ?
>
GPU reset issue started after adding [PATCH 6/6].

Root cause -> In drm_sched_get_cleanup_job(), We use the finished fence
status bit to check the job status dma_fence_is_signaled(). If a job is
signaled (DMA_FENCE_FLAG_SIGNALED_BIT is set), then we cancel the reset
worker thread.

After applying [patch 6] now we are checking enable signaling in
dma_fence_is_signaled() by checking DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT
bit. but signaling is not enabled for the finished fence. As a result,
dma_fence_is_signaled() always returns false, and
drm_sched_get_cleanup_job() will not cancel the reset worker thread,
resulting in the GPU reset.

To Fix the above issue  Christian suggested that we can use
parent(hardware) fence instead of finished fence because signaling
enabled by the calling of dma_fence_add_callback() for parent fence. As
a result, dma_fence_is_signaled() will return the correct fence status
and reset worker thread can be cancelled in drm_sched_get_cleanup_job().

~arvind

> Andrey
>
> On 2022-09-09 13:08, Arvind Yadav wrote:
>> Using the parent fence instead of the finished fence
>> to get the job status. This change is to avoid GPU
>> scheduler timeout error which can cause GPU reset.
>>
>> Signed-off-by: Arvind Yadav <[email protected]>
>> ---
>>
>> changes in v1,v2 - Enable signaling for finished fence in sche_main()
>> is removed
>>
>> ---
>>   drivers/gpu/drm/scheduler/sched_main.c | 4 ++--
>>   1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index e0ab14e0fb6b..2ac28ad11432 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -829,7 +829,7 @@ drm_sched_get_cleanup_job(struct
>> drm_gpu_scheduler *sched)
>>       job = list_first_entry_or_null(&sched->pending_list,
>>                          struct drm_sched_job, list);
>>   -    if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>> +    if (job && dma_fence_is_signaled(job->s_fence->parent)) {
>>           /* remove job from pending_list */
>>           list_del_init(&job->list);
>>   @@ -841,7 +841,7 @@ drm_sched_get_cleanup_job(struct
>> drm_gpu_scheduler *sched)
>>             if (next) {
>>               next->s_fence->scheduled.timestamp =
>> -                job->s_fence->finished.timestamp;
>> +                job->s_fence->parent->timestamp;
>>               /* start TO timer for next job */
>>               drm_sched_start_timeout(sched);
>>           }

2022-09-09 21:12:46

by Andrey Grodzovsky

[permalink] [raw]
Subject: Re: [PATCH v3 5/6] drm/sched: Use parent fence instead of finished

Got it.

Reviewed-by: Andrey Grodzovsky <[email protected]>

Andrey

On 2022-09-09 16:30, Yadav, Arvind wrote:
>
> On 9/9/2022 11:02 PM, Andrey Grodzovsky wrote:
>> What exactly is the scenario which this patch fixes in more detail
>> please  ?
>>
> GPU reset issue started after adding [PATCH 6/6].
>
> Root cause -> In drm_sched_get_cleanup_job(), We use the finished
> fence status bit to check the job status dma_fence_is_signaled(). If a
> job is signaled (DMA_FENCE_FLAG_SIGNALED_BIT is set), then we cancel
> the reset worker thread.
>
> After applying [patch 6] now we are checking enable signaling in
> dma_fence_is_signaled() by checking DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT
> bit. but signaling is not enabled for the finished fence. As a result,
> dma_fence_is_signaled() always returns false, and
> drm_sched_get_cleanup_job() will not cancel the reset worker thread,
> resulting in the GPU reset.
>
> To Fix the above issue  Christian suggested that we can use
> parent(hardware) fence instead of finished fence because signaling
> enabled by the calling of dma_fence_add_callback() for parent fence.
> As a result, dma_fence_is_signaled() will return the correct fence
> status and reset worker thread can be cancelled in
> drm_sched_get_cleanup_job().
>
> ~arvind
>
>> Andrey
>>
>> On 2022-09-09 13:08, Arvind Yadav wrote:
>>> Using the parent fence instead of the finished fence
>>> to get the job status. This change is to avoid GPU
>>> scheduler timeout error which can cause GPU reset.
>>>
>>> Signed-off-by: Arvind Yadav <[email protected]>
>>> ---
>>>
>>> changes in v1,v2 - Enable signaling for finished fence in sche_main()
>>> is removed
>>>
>>> ---
>>>   drivers/gpu/drm/scheduler/sched_main.c | 4 ++--
>>>   1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index e0ab14e0fb6b..2ac28ad11432 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -829,7 +829,7 @@ drm_sched_get_cleanup_job(struct
>>> drm_gpu_scheduler *sched)
>>>       job = list_first_entry_or_null(&sched->pending_list,
>>>                          struct drm_sched_job, list);
>>>   -    if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>>> +    if (job && dma_fence_is_signaled(job->s_fence->parent)) {
>>>           /* remove job from pending_list */
>>>           list_del_init(&job->list);
>>>   @@ -841,7 +841,7 @@ drm_sched_get_cleanup_job(struct
>>> drm_gpu_scheduler *sched)
>>>             if (next) {
>>>               next->s_fence->scheduled.timestamp =
>>> -                job->s_fence->finished.timestamp;
>>> +                job->s_fence->parent->timestamp;
>>>               /* start TO timer for next job */
>>>               drm_sched_start_timeout(sched);
>>>           }

2022-09-12 07:17:02

by Christian König

[permalink] [raw]
Subject: Re: [PATCH v3 3/6] dma-buf: Enable signaling on fence for selftests

Am 09.09.22 um 19:08 schrieb Arvind Yadav:
> Here's enabling software signaling on fence for selftest.
>
> Signed-off-by: Arvind Yadav <[email protected]>

Reviewed-by: Christian König <[email protected]>

> ---
>
> Changes in v1 :
> 1- Addressing Christian's comment to remove unnecessary callback.
> 2- Replacing CONFIG_DEBUG_WW_MUTEX_SLOWPATH instead of CONFIG_DEBUG_FS.
> 3- The version of this patch is also changed and previously
> it was [PATCH 4/4]
>
> Changes in v2 :
> 1- #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH removed
>
> ---
> drivers/dma-buf/st-dma-fence-chain.c | 4 ++++
> drivers/dma-buf/st-dma-fence-unwrap.c | 22 ++++++++++++++++++++++
> drivers/dma-buf/st-dma-fence.c | 16 ++++++++++++++++
> drivers/dma-buf/st-dma-resv.c | 10 ++++++++++
> 4 files changed, 52 insertions(+)
>
> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
> index 8ce1ea59d31b..0a9b099d0518 100644
> --- a/drivers/dma-buf/st-dma-fence-chain.c
> +++ b/drivers/dma-buf/st-dma-fence-chain.c
> @@ -87,6 +87,8 @@ static int sanitycheck(void *arg)
> if (!chain)
> err = -ENOMEM;
>
> + dma_fence_enable_sw_signaling(chain);
> +
> dma_fence_signal(f);
> dma_fence_put(f);
>
> @@ -143,6 +145,8 @@ static int fence_chains_init(struct fence_chains *fc, unsigned int count,
> }
>
> fc->tail = fc->chains[i];
> +
> + dma_fence_enable_sw_signaling(fc->chains[i]);
> }
>
> fc->chain_length = i;
> diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c
> index 4105d5ea8dde..f0cee984b6c7 100644
> --- a/drivers/dma-buf/st-dma-fence-unwrap.c
> +++ b/drivers/dma-buf/st-dma-fence-unwrap.c
> @@ -102,6 +102,8 @@ static int sanitycheck(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> array = mock_array(1, f);
> if (!array)
> return -ENOMEM;
> @@ -124,12 +126,16 @@ static int unwrap_array(void *arg)
> if (!f1)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f1);
> +
> f2 = mock_fence();
> if (!f2) {
> dma_fence_put(f1);
> return -ENOMEM;
> }
>
> + dma_fence_enable_sw_signaling(f2);
> +
> array = mock_array(2, f1, f2);
> if (!array)
> return -ENOMEM;
> @@ -164,12 +170,16 @@ static int unwrap_chain(void *arg)
> if (!f1)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f1);
> +
> f2 = mock_fence();
> if (!f2) {
> dma_fence_put(f1);
> return -ENOMEM;
> }
>
> + dma_fence_enable_sw_signaling(f2);
> +
> chain = mock_chain(f1, f2);
> if (!chain)
> return -ENOMEM;
> @@ -204,12 +214,16 @@ static int unwrap_chain_array(void *arg)
> if (!f1)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f1);
> +
> f2 = mock_fence();
> if (!f2) {
> dma_fence_put(f1);
> return -ENOMEM;
> }
>
> + dma_fence_enable_sw_signaling(f2);
> +
> array = mock_array(2, f1, f2);
> if (!array)
> return -ENOMEM;
> @@ -248,12 +262,16 @@ static int unwrap_merge(void *arg)
> if (!f1)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f1);
> +
> f2 = mock_fence();
> if (!f2) {
> err = -ENOMEM;
> goto error_put_f1;
> }
>
> + dma_fence_enable_sw_signaling(f2);
> +
> f3 = dma_fence_unwrap_merge(f1, f2);
> if (!f3) {
> err = -ENOMEM;
> @@ -296,10 +314,14 @@ static int unwrap_merge_complex(void *arg)
> if (!f1)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f1);
> +
> f2 = mock_fence();
> if (!f2)
> goto error_put_f1;
>
> + dma_fence_enable_sw_signaling(f2);
> +
> f3 = dma_fence_unwrap_merge(f1, f2);
> if (!f3)
> goto error_put_f2;
> diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c
> index c8a12d7ad71a..fb6e0a6ae2c9 100644
> --- a/drivers/dma-buf/st-dma-fence.c
> +++ b/drivers/dma-buf/st-dma-fence.c
> @@ -102,6 +102,8 @@ static int sanitycheck(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> dma_fence_signal(f);
> dma_fence_put(f);
>
> @@ -117,6 +119,8 @@ static int test_signaling(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> if (dma_fence_is_signaled(f)) {
> pr_err("Fence unexpectedly signaled on creation\n");
> goto err_free;
> @@ -190,6 +194,8 @@ static int test_late_add_callback(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> dma_fence_signal(f);
>
> if (!dma_fence_add_callback(f, &cb.cb, simple_callback)) {
> @@ -282,6 +288,8 @@ static int test_status(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> if (dma_fence_get_status(f)) {
> pr_err("Fence unexpectedly has signaled status on creation\n");
> goto err_free;
> @@ -308,6 +316,8 @@ static int test_error(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> dma_fence_set_error(f, -EIO);
>
> if (dma_fence_get_status(f)) {
> @@ -337,6 +347,8 @@ static int test_wait(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> if (dma_fence_wait_timeout(f, false, 0) != -ETIME) {
> pr_err("Wait reported complete before being signaled\n");
> goto err_free;
> @@ -379,6 +391,8 @@ static int test_wait_timeout(void *arg)
> if (!wt.f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(wt.f);
> +
> if (dma_fence_wait_timeout(wt.f, false, 1) != -ETIME) {
> pr_err("Wait reported complete before being signaled\n");
> goto err_free;
> @@ -458,6 +472,8 @@ static int thread_signal_callback(void *arg)
> break;
> }
>
> + dma_fence_enable_sw_signaling(f1);
> +
> rcu_assign_pointer(t->fences[t->id], f1);
> smp_wmb();
>
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index 813779e3c9be..15dbea1462ed 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -45,6 +45,8 @@ static int sanitycheck(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> dma_fence_signal(f);
> dma_fence_put(f);
>
> @@ -69,6 +71,8 @@ static int test_signaling(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> dma_resv_init(&resv);
> r = dma_resv_lock(&resv, NULL);
> if (r) {
> @@ -114,6 +118,8 @@ static int test_for_each(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> dma_resv_init(&resv);
> r = dma_resv_lock(&resv, NULL);
> if (r) {
> @@ -173,6 +179,8 @@ static int test_for_each_unlocked(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> dma_resv_init(&resv);
> r = dma_resv_lock(&resv, NULL);
> if (r) {
> @@ -244,6 +252,8 @@ static int test_get_fences(void *arg)
> if (!f)
> return -ENOMEM;
>
> + dma_fence_enable_sw_signaling(f);
> +
> dma_resv_init(&resv);
> r = dma_resv_lock(&resv, NULL);
> if (r) {

2022-09-12 07:18:54

by Christian König

[permalink] [raw]
Subject: Re: [PATCH v3 2/6] dma-buf: set signaling bit for the stub fence

Am 09.09.22 um 19:08 schrieb Arvind Yadav:
> Here's setting software signaling bit for the stub fence
> which is always signaled. If this fence signaling bit is
> not set then the AMD GPU scheduler will cause a GPU reset
> due to a GPU scheduler cleanup activity timeout.
>
> Signed-off-by: Arvind Yadav <[email protected]>

Reviewed-by: Christian König <[email protected]>

> ---
>
> Changes in v1 :
> 1- Addressing Christian's comment to remove unnecessary callback.
> 2- Replacing CONFIG_DEBUG_WW_MUTEX_SLOWPATH instead of CONFIG_DEBUG_FS.
> 3- The version of this patch is also changed and previously
> it was [PATCH 3/4]
>
> Changes in v2 :
> 1 - perviously using __dma_fence_enable_signaling() for enable
> signaling.
> 2 - #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH removed
>
> ---
> drivers/dma-buf/dma-fence.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 64c99739ad23..bead1a6e9f59 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -136,6 +136,10 @@ struct dma_fence *dma_fence_get_stub(void)
> &dma_fence_stub_ops,
> &dma_fence_stub_lock,
> 0, 0);
> +
> + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> + &dma_fence_stub.flags);
> +
> dma_fence_signal_locked(&dma_fence_stub);
> }
> spin_unlock(&dma_fence_stub_lock);

2022-09-12 09:10:58

by Christian König

[permalink] [raw]
Subject: Re: [PATCH v3 6/6] dma-buf: Check status of enable-signaling bit on debug

Am 09.09.22 um 19:08 schrieb Arvind Yadav:
> Fence signaling must be enabled to make sure that
> the dma_fence_is_signaled() function ever returns true.
> Since drivers and implementations sometimes mess this up,
> this ensures correct behaviour when DEBUG_WW_MUTEX_SLOWPATH
> is used during debugging.
> This should make any implementation bugs resulting in not
> signaled fences much more obvious.
>
> Signed-off-by: Arvind Yadav <[email protected]>
> ---
>
> Changes in v1,v2 :
> 1- Addressing Christian's comment to replace
> CONFIG_DEBUG_WW_MUTEX_SLOWPATH instead of CONFIG_DEBUG_FS.
> 2- As per Christian's comment moving this patch at last so
> The version of this patch is also changed and previously
> it was [PATCH 1/4]
>
> ---
> include/linux/dma-fence.h | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 775cdc0b4f24..ba1ddc14c5d4 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -428,6 +428,11 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
> static inline bool
> dma_fence_is_signaled(struct dma_fence *fence)
> {
> +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH

As by review comment from Tvrtko Ursulin let's add a separate config
option for this into drivers/dma-buf/Kconfig

Thanks,
Christian.

> + if (!test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags))
> + return false;
> +#endif
> +
> if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> return true;
>

2022-09-12 09:11:33

by Christian König

[permalink] [raw]
Subject: Re: [PATCH v3 4/6] drm/amdgpu: Enable signaling on fence.

Am 09.09.22 um 19:08 schrieb Arvind Yadav:
> Here's enabling software signaling on fence because
> amdgpu_ctx_add_fence() is checking the status of fence
> and emits warning.
>
> Signed-off-by: Arvind Yadav <[email protected]>
> ---
>
> Changes in v1, v2: This new patch was not part of previous series.
>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index afe22f83d4a6..21221d705588 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -730,6 +730,8 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
>
> dma_fence_get(fence);
>
> + dma_fence_enable_sw_signaling(fence);
> +

That looks like a step into the right direction, but still isn't correct.

The code using this interface should call amdgpu_ctx_wait_prev_fence()
before calling amdgpu_ctx_add_fence(). And amdgpu_ctx_wait_prev_fence()
in turn calls dma_fence_wait() which should also enables the signaling.

So when we need this here something is still very wrong on the logic :)

Thanks,
Christian.

> spin_lock(&ctx->ring_lock);
> centity->fences[idx] = fence;
> centity->sequence++;