Subject: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

During it signals the completion of a writeback job, after releasing
the out_fence, we'd clear the pointer.

Check if fence left over in drm_writeback_cleanup_job(), release it.

Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
---
drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
index ff138b6..43d9e3b 100644
--- a/drivers/gpu/drm/drm_writeback.c
+++ b/drivers/gpu/drm/drm_writeback.c
@@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
if (job->fb)
drm_framebuffer_put(job->fb);

+ if (job->out_fence)
+ dma_fence_put(job->out_fence);
+
kfree(job);
}
EXPORT_SYMBOL(drm_writeback_cleanup_job);
@@ -366,25 +369,29 @@ static void cleanup_work(struct work_struct *work)
{
unsigned long flags;
struct drm_writeback_job *job;
+ struct dma_fence *out_fence;

spin_lock_irqsave(&wb_connector->job_lock, flags);
job = list_first_entry_or_null(&wb_connector->job_queue,
struct drm_writeback_job,
list_entry);
- if (job) {
+ if (job)
list_del(&job->list_entry);
- if (job->out_fence) {
- if (status)
- dma_fence_set_error(job->out_fence, status);
- dma_fence_signal(job->out_fence);
- dma_fence_put(job->out_fence);
- }
- }
+
spin_unlock_irqrestore(&wb_connector->job_lock, flags);

if (WARN_ON(!job))
return;

+ out_fence = job->out_fence;
+ if (out_fence) {
+ if (status)
+ dma_fence_set_error(out_fence, status);
+ dma_fence_signal(out_fence);
+ dma_fence_put(out_fence);
+ job->out_fence = NULL;
+ }
+
INIT_WORK(&job->cleanup_work, cleanup_work);
queue_work(system_long_wq, &job->cleanup_work);
}
--
1.9.1


2019-07-31 13:16:27

by Liviu Dudau

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

Hi Lowry,

On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> During it signals the completion of a writeback job, after releasing
> the out_fence, we'd clear the pointer.
>
> Check if fence left over in drm_writeback_cleanup_job(), release it.
>
> Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> ---
> drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> index ff138b6..43d9e3b 100644
> --- a/drivers/gpu/drm/drm_writeback.c
> +++ b/drivers/gpu/drm/drm_writeback.c
> @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> if (job->fb)
> drm_framebuffer_put(job->fb);
>
> + if (job->out_fence)
> + dma_fence_put(job->out_fence);
> +
> kfree(job);
> }

This change looks good.

> EXPORT_SYMBOL(drm_writeback_cleanup_job);
> @@ -366,25 +369,29 @@ static void cleanup_work(struct work_struct *work)
> {
> unsigned long flags;
> struct drm_writeback_job *job;
> + struct dma_fence *out_fence;
>
> spin_lock_irqsave(&wb_connector->job_lock, flags);
> job = list_first_entry_or_null(&wb_connector->job_queue,
> struct drm_writeback_job,
> list_entry);
> - if (job) {
> + if (job)
> list_del(&job->list_entry);
> - if (job->out_fence) {
> - if (status)
> - dma_fence_set_error(job->out_fence, status);
> - dma_fence_signal(job->out_fence);
> - dma_fence_put(job->out_fence);

*Here*

> - }
> - }
> +
> spin_unlock_irqrestore(&wb_connector->job_lock, flags);
>
> if (WARN_ON(!job))
> return;
>
> + out_fence = job->out_fence;
> + if (out_fence) {
> + if (status)
> + dma_fence_set_error(out_fence, status);
> + dma_fence_signal(out_fence);
> + dma_fence_put(out_fence);
> + job->out_fence = NULL;
> + }
> +

I don't get the point of this change. Why not just add job->out_fence = NULL
where *Here* is?

Best regards,
Liviu

> INIT_WORK(&job->cleanup_work, cleanup_work);
> queue_work(system_long_wq, &job->cleanup_work);
> }
> --
> 1.9.1
>

--
====================
| I would like to |
| fix the world, |
| but they're not |
| giving me the |
\ source code! /
---------------
¯\_(ツ)_/¯

2019-07-31 14:53:42

by Brian Starkey

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

Hi Lowry,

Thanks for this cleanup.

On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> During it signals the completion of a writeback job, after releasing
> the out_fence, we'd clear the pointer.
>
> Check if fence left over in drm_writeback_cleanup_job(), release it.
>
> Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> ---
> drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> index ff138b6..43d9e3b 100644
> --- a/drivers/gpu/drm/drm_writeback.c
> +++ b/drivers/gpu/drm/drm_writeback.c
> @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> if (job->fb)
> drm_framebuffer_put(job->fb);
>
> + if (job->out_fence)

I'm thinking it might be a good idea to signal the fence with an error
here, if it's not already signaled. Otherwise, if there's someone
waiting (which there shouldn't be), they're going to be waiting a very
long time :-)

Thanks,
-Brian

> + dma_fence_put(job->out_fence);
> +
> kfree(job);
> }

2019-08-01 10:03:55

by Liviu Dudau

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

On Thu, Aug 01, 2019 at 06:31:13AM +0000, Lowry Li (Arm Technology China) wrote:
> Hi Liviu,
>
> On Wed, Jul 31, 2019 at 01:15:25PM +0000, Liviu Dudau wrote:
> > Hi Lowry,
> >
> > On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> > > During it signals the completion of a writeback job, after releasing
> > > the out_fence, we'd clear the pointer.
> > >
> > > Check if fence left over in drm_writeback_cleanup_job(), release it.
> > >
> > > Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> > > ---
> > > drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> > > 1 file changed, 15 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> > > index ff138b6..43d9e3b 100644
> > > --- a/drivers/gpu/drm/drm_writeback.c
> > > +++ b/drivers/gpu/drm/drm_writeback.c
> > > @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> > > if (job->fb)
> > > drm_framebuffer_put(job->fb);
> > >
> > > + if (job->out_fence)
> > > + dma_fence_put(job->out_fence);
> > > +
> > > kfree(job);
> > > }
> >
> > This change looks good.
> >
> > > EXPORT_SYMBOL(drm_writeback_cleanup_job);
> > > @@ -366,25 +369,29 @@ static void cleanup_work(struct work_struct *work)
> > > {
> > > unsigned long flags;
> > > struct drm_writeback_job *job;
> > > + struct dma_fence *out_fence;
> > >
> > > spin_lock_irqsave(&wb_connector->job_lock, flags);
> > > job = list_first_entry_or_null(&wb_connector->job_queue,
> > > struct drm_writeback_job,
> > > list_entry);
> > > - if (job) {
> > > + if (job)
> > > list_del(&job->list_entry);
> > > - if (job->out_fence) {
> > > - if (status)
> > > - dma_fence_set_error(job->out_fence, status);
> > > - dma_fence_signal(job->out_fence);
> > > - dma_fence_put(job->out_fence);
> >
> > *Here*
> >
> > > - }
> > > - }
> > > +
> > > spin_unlock_irqrestore(&wb_connector->job_lock, flags);
> > >
> > > if (WARN_ON(!job))
> > > return;
> > >
> > > + out_fence = job->out_fence;
> > > + if (out_fence) {
> > > + if (status)
> > > + dma_fence_set_error(out_fence, status);
> > > + dma_fence_signal(out_fence);
> > > + dma_fence_put(out_fence);
> > > + job->out_fence = NULL;
> > > + }
> > > +
> >
> > I don't get the point of this change. Why not just add job->out_fence = NULL
> > where *Here* is?
> >
> > Best regards,
> > Liviu
> Besides setting NULL, also did a refine by moving the fence operation
> out of the lock block.

OK, now it makes sense. May I suggest you add that to the commit message?

Otherwise, Acked-by: Liviu Dudau <[email protected]>

Best regards,
Liviu

>
> Best regards,
> Lowry
> > > INIT_WORK(&job->cleanup_work, cleanup_work);
> > > queue_work(system_long_wq, &job->cleanup_work);
> > > }
> > > --
> > > 1.9.1
> > >
> >
> > --
> > ====================
> > | I would like to |
> > | fix the world, |
> > | but they're not |
> > | giving me the |
> > \ source code! /
> > ---------------
> > ¯\_(ツ)_/¯
>
> --
> Regards,
> Lowry

--
====================
| I would like to |
| fix the world, |
| but they're not |
| giving me the |
\ source code! /
---------------
¯\_(ツ)_/¯

2019-08-02 09:48:45

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

On Fri, Aug 2, 2019 at 11:43 AM Daniel Vetter <[email protected]> wrote:
>
> On Fri, Aug 2, 2019 at 11:29 AM Brian Starkey <[email protected]> wrote:
> >
> > Hi Lowry,
> >
> > On Thu, Aug 01, 2019 at 06:34:08AM +0000, Lowry Li (Arm Technology China) wrote:
> > > Hi Brian,
> > >
> > > On Wed, Jul 31, 2019 at 09:20:04PM +0800, Brian Starkey wrote:
> > > > Hi Lowry,
> > > >
> > > > Thanks for this cleanup.
> > > >
> > > > On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> > > > > During it signals the completion of a writeback job, after releasing
> > > > > the out_fence, we'd clear the pointer.
> > > > >
> > > > > Check if fence left over in drm_writeback_cleanup_job(), release it.
> > > > >
> > > > > Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> > > > > ---
> > > > > drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> > > > > 1 file changed, 15 insertions(+), 8 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> > > > > index ff138b6..43d9e3b 100644
> > > > > --- a/drivers/gpu/drm/drm_writeback.c
> > > > > +++ b/drivers/gpu/drm/drm_writeback.c
> > > > > @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> > > > > if (job->fb)
> > > > > drm_framebuffer_put(job->fb);
> > > > >
> > > > > + if (job->out_fence)
> > > >
> > > > I'm thinking it might be a good idea to signal the fence with an error
> > > > here, if it's not already signaled. Otherwise, if there's someone
> > > > waiting (which there shouldn't be), they're going to be waiting a very
> > > > long time :-)
> > > >
> > > > Thanks,
> > > > -Brian
> > > >
> > > Here it happened at atomic_check failed and test only commit. For both
> > > cases, the commit has been dropped and it's only a clean up. So here better
> > > not be treated as an error case:)
> >
> > If anyone else has a reference on the fence, then IMO it absolutely is
> > an error to reach this point without the fence being signaled -
> > because it means that the fence will never be signaled.
> >
> > I don't think the API gives you a way to check if this is the last
> > reference, so it's safest to just make sure the fence is signalled
> > before dropping the reference.
> >
> > It just feels wrong to me to have the possibility of a dangling fence
> > which is never going to get signalled; and it's an easy defensive step
> > to make sure it can never happen.
> >
> > I know it _shouldn't_ happen, but we often put in handling for cases
> > which shouldn't happen, because they frequently do happen :-)
>
> We're not as paranoid with the vblank fences either, so not sure why
> we need to be this paranoid with writeback fences. If your driver
> grabs anything from the atomic state in ->atomic_check it's buggy
> anyway.
>
> If you want to fix this properly I think we need to move the call to
> prepare_signalling() in between atomic_check and atomic_commit. Then I
> think it makes sense to also force-complete the fence on error ...
>
> > > Since for userspace, it should have been failed or a test only case, so
> > > writebace fence should not be signaled.
> >
> > It's not only userspace that can wait on fences (and in fact this
> > fence will never even reach userspace if the commit fails), the driver
> > may have taken a copy to use for "something".

I forgot to add: you can check this by looking at the fence reference
count. A WARN_ON if that's more than 1 on cleanup (but also for the
out fences) could be a nice addition.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

2019-08-02 10:03:17

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

On Fri, Aug 2, 2019 at 11:29 AM Brian Starkey <[email protected]> wrote:
>
> Hi Lowry,
>
> On Thu, Aug 01, 2019 at 06:34:08AM +0000, Lowry Li (Arm Technology China) wrote:
> > Hi Brian,
> >
> > On Wed, Jul 31, 2019 at 09:20:04PM +0800, Brian Starkey wrote:
> > > Hi Lowry,
> > >
> > > Thanks for this cleanup.
> > >
> > > On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> > > > During it signals the completion of a writeback job, after releasing
> > > > the out_fence, we'd clear the pointer.
> > > >
> > > > Check if fence left over in drm_writeback_cleanup_job(), release it.
> > > >
> > > > Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> > > > ---
> > > > drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> > > > 1 file changed, 15 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> > > > index ff138b6..43d9e3b 100644
> > > > --- a/drivers/gpu/drm/drm_writeback.c
> > > > +++ b/drivers/gpu/drm/drm_writeback.c
> > > > @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> > > > if (job->fb)
> > > > drm_framebuffer_put(job->fb);
> > > >
> > > > + if (job->out_fence)
> > >
> > > I'm thinking it might be a good idea to signal the fence with an error
> > > here, if it's not already signaled. Otherwise, if there's someone
> > > waiting (which there shouldn't be), they're going to be waiting a very
> > > long time :-)
> > >
> > > Thanks,
> > > -Brian
> > >
> > Here it happened at atomic_check failed and test only commit. For both
> > cases, the commit has been dropped and it's only a clean up. So here better
> > not be treated as an error case:)
>
> If anyone else has a reference on the fence, then IMO it absolutely is
> an error to reach this point without the fence being signaled -
> because it means that the fence will never be signaled.
>
> I don't think the API gives you a way to check if this is the last
> reference, so it's safest to just make sure the fence is signalled
> before dropping the reference.
>
> It just feels wrong to me to have the possibility of a dangling fence
> which is never going to get signalled; and it's an easy defensive step
> to make sure it can never happen.
>
> I know it _shouldn't_ happen, but we often put in handling for cases
> which shouldn't happen, because they frequently do happen :-)

We're not as paranoid with the vblank fences either, so not sure why
we need to be this paranoid with writeback fences. If your driver
grabs anything from the atomic state in ->atomic_check it's buggy
anyway.

If you want to fix this properly I think we need to move the call to
prepare_signalling() in between atomic_check and atomic_commit. Then I
think it makes sense to also force-complete the fence on error ...
-Daniel

> > Since for userspace, it should have been failed or a test only case, so
> > writebace fence should not be signaled.
>
> It's not only userspace that can wait on fences (and in fact this
> fence will never even reach userspace if the commit fails), the driver
> may have taken a copy to use for "something".
>
> Cheers,
> -Brian
>
> >
> > Best regards,
> > Lowry
> > > > + dma_fence_put(job->out_fence);
> > > > +
> > > > kfree(job);
> > > > }
> >
> > --
> > Regards,
> > Lowry



--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

2019-08-02 10:10:56

by Brian Starkey

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

Hi Daniel,

On Fri, Aug 02, 2019 at 11:45:13AM +0200, Daniel Vetter wrote:
> On Fri, Aug 2, 2019 at 11:43 AM Daniel Vetter <[email protected]> wrote:
> >
> > On Fri, Aug 2, 2019 at 11:29 AM Brian Starkey <[email protected]> wrote:
> > >
> > > Hi Lowry,
> > >
> > > On Thu, Aug 01, 2019 at 06:34:08AM +0000, Lowry Li (Arm Technology China) wrote:
> > > > Hi Brian,
> > > >
> > > > On Wed, Jul 31, 2019 at 09:20:04PM +0800, Brian Starkey wrote:
> > > > > Hi Lowry,
> > > > >
> > > > > Thanks for this cleanup.
> > > > >
> > > > > On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> > > > > > During it signals the completion of a writeback job, after releasing
> > > > > > the out_fence, we'd clear the pointer.
> > > > > >
> > > > > > Check if fence left over in drm_writeback_cleanup_job(), release it.
> > > > > >
> > > > > > Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> > > > > > ---
> > > > > > drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> > > > > > 1 file changed, 15 insertions(+), 8 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> > > > > > index ff138b6..43d9e3b 100644
> > > > > > --- a/drivers/gpu/drm/drm_writeback.c
> > > > > > +++ b/drivers/gpu/drm/drm_writeback.c
> > > > > > @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> > > > > > if (job->fb)
> > > > > > drm_framebuffer_put(job->fb);
> > > > > >
> > > > > > + if (job->out_fence)
> > > > >
> > > > > I'm thinking it might be a good idea to signal the fence with an error
> > > > > here, if it's not already signaled. Otherwise, if there's someone
> > > > > waiting (which there shouldn't be), they're going to be waiting a very
> > > > > long time :-)
> > > > >
> > > > > Thanks,
> > > > > -Brian
> > > > >
> > > > Here it happened at atomic_check failed and test only commit. For both
> > > > cases, the commit has been dropped and it's only a clean up. So here better
> > > > not be treated as an error case:)
> > >
> > > If anyone else has a reference on the fence, then IMO it absolutely is
> > > an error to reach this point without the fence being signaled -
> > > because it means that the fence will never be signaled.
> > >
> > > I don't think the API gives you a way to check if this is the last
> > > reference, so it's safest to just make sure the fence is signalled
> > > before dropping the reference.
> > >
> > > It just feels wrong to me to have the possibility of a dangling fence
> > > which is never going to get signalled; and it's an easy defensive step
> > > to make sure it can never happen.
> > >
> > > I know it _shouldn't_ happen, but we often put in handling for cases
> > > which shouldn't happen, because they frequently do happen :-)
> >
> > We're not as paranoid with the vblank fences either, so not sure why
> > we need to be this paranoid with writeback fences. If your driver
> > grabs anything from the atomic state in ->atomic_check it's buggy
> > anyway.
> >
> > If you want to fix this properly I think we need to move the call to
> > prepare_signalling() in between atomic_check and atomic_commit. Then I
> > think it makes sense to also force-complete the fence on error ...

Well, fair enough. I'm struggling with "that's too paranoid" vs "fix
it properly" though? Is it a "problem" worth fixing or not?

It seems natural to me to do the fence cleanup in the cleanup function
for the object which owns the fence.

> >
> > > > Since for userspace, it should have been failed or a test only case, so
> > > > writebace fence should not be signaled.
> > >
> > > It's not only userspace that can wait on fences (and in fact this
> > > fence will never even reach userspace if the commit fails), the driver
> > > may have taken a copy to use for "something".
>
> I forgot to add: you can check this by looking at the fence reference
> count. A WARN_ON if that's more than 1 on cleanup (but also for the
> out fences) could be a nice addition.

Do we really want to be looking at the fence internals directly like
that?

Cheers,
-Brian

> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

2019-08-02 13:24:10

by Brian Starkey

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

Hi Lowry,

On Thu, Aug 01, 2019 at 06:34:08AM +0000, Lowry Li (Arm Technology China) wrote:
> Hi Brian,
>
> On Wed, Jul 31, 2019 at 09:20:04PM +0800, Brian Starkey wrote:
> > Hi Lowry,
> >
> > Thanks for this cleanup.
> >
> > On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> > > During it signals the completion of a writeback job, after releasing
> > > the out_fence, we'd clear the pointer.
> > >
> > > Check if fence left over in drm_writeback_cleanup_job(), release it.
> > >
> > > Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> > > ---
> > > drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> > > 1 file changed, 15 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> > > index ff138b6..43d9e3b 100644
> > > --- a/drivers/gpu/drm/drm_writeback.c
> > > +++ b/drivers/gpu/drm/drm_writeback.c
> > > @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> > > if (job->fb)
> > > drm_framebuffer_put(job->fb);
> > >
> > > + if (job->out_fence)
> >
> > I'm thinking it might be a good idea to signal the fence with an error
> > here, if it's not already signaled. Otherwise, if there's someone
> > waiting (which there shouldn't be), they're going to be waiting a very
> > long time :-)
> >
> > Thanks,
> > -Brian
> >
> Here it happened at atomic_check failed and test only commit. For both
> cases, the commit has been dropped and it's only a clean up. So here better
> not be treated as an error case:)

If anyone else has a reference on the fence, then IMO it absolutely is
an error to reach this point without the fence being signaled -
because it means that the fence will never be signaled.

I don't think the API gives you a way to check if this is the last
reference, so it's safest to just make sure the fence is signalled
before dropping the reference.

It just feels wrong to me to have the possibility of a dangling fence
which is never going to get signalled; and it's an easy defensive step
to make sure it can never happen.

I know it _shouldn't_ happen, but we often put in handling for cases
which shouldn't happen, because they frequently do happen :-)

>
> Since for userspace, it should have been failed or a test only case, so
> writebace fence should not be signaled.

It's not only userspace that can wait on fences (and in fact this
fence will never even reach userspace if the commit fails), the driver
may have taken a copy to use for "something".

Cheers,
-Brian

>
> Best regards,
> Lowry
> > > + dma_fence_put(job->out_fence);
> > > +
> > > kfree(job);
> > > }
>
> --
> Regards,
> Lowry

2019-08-02 13:37:39

by James Qian Wang

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

On Fri, Aug 02, 2019 at 05:29:20PM +0800, Brian Starkey wrote:
> Hi Lowry,
>
> On Thu, Aug 01, 2019 at 06:34:08AM +0000, Lowry Li (Arm Technology China) wrote:
> > Hi Brian,
> >
> > On Wed, Jul 31, 2019 at 09:20:04PM +0800, Brian Starkey wrote:
> > > Hi Lowry,
> > >
> > > Thanks for this cleanup.
> > >
> > > On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> > > > During it signals the completion of a writeback job, after releasing
> > > > the out_fence, we'd clear the pointer.
> > > >
> > > > Check if fence left over in drm_writeback_cleanup_job(), release it.
> > > >
> > > > Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> > > > ---
> > > > drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> > > > 1 file changed, 15 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> > > > index ff138b6..43d9e3b 100644
> > > > --- a/drivers/gpu/drm/drm_writeback.c
> > > > +++ b/drivers/gpu/drm/drm_writeback.c
> > > > @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> > > > if (job->fb)
> > > > drm_framebuffer_put(job->fb);
> > > >
> > > > + if (job->out_fence)
> > >
> > > I'm thinking it might be a good idea to signal the fence with an error
> > > here, if it's not already signaled. Otherwise, if there's someone
> > > waiting (which there shouldn't be), they're going to be waiting a very
> > > long time :-)
> > >
> > > Thanks,
> > > -Brian
> > >
> > Here it happened at atomic_check failed and test only commit. For both
> > cases, the commit has been dropped and it's only a clean up. So here better
> > not be treated as an error case:)
>
> If anyone else has a reference on the fence, then IMO it absolutely is
> an error to reach this point without the fence being signaled -
> because it means that the fence will never be signaled.
>
> I don't think the API gives you a way to check if this is the last
> reference, so it's safest to just make sure the fence is signalled
> before dropping the reference.
>
> It just feels wrong to me to have the possibility of a dangling fence
> which is never going to get signalled; and it's an easy defensive step
> to make sure it can never happen.
>
> I know it _shouldn't_ happen, but we often put in handling for cases
> which shouldn't happen, because they frequently do happen :-)
>
> >
> > Since for userspace, it should have been failed or a test only case, so
> > writebace fence should not be signaled.
>
> It's not only userspace that can wait on fences (and in fact this
> fence will never even reach userspace if the commit fails), the driver
> may have taken a copy to use for "something".
>

Maybe we can add a wb_fence canceling into complete_signaling() for the
atomic_check failed cleanup like the crtc->out_fence.

Then if in this place we still can got a fence here, that must be a
error we signal and WARN it

Thanks
James

> Cheers,
> -Brian
>
> >
> > Best regards,
> > Lowry
> > > > + dma_fence_put(job->out_fence);
> > > > +
> > > > kfree(job);
> > > > }
> >
> > --
> > Regards
> > Lowry

2019-08-03 18:06:43

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

On Fri, Aug 02, 2019 at 10:09:05AM +0000, Brian Starkey wrote:
> Hi Daniel,
>
> On Fri, Aug 02, 2019 at 11:45:13AM +0200, Daniel Vetter wrote:
> > On Fri, Aug 2, 2019 at 11:43 AM Daniel Vetter <[email protected]> wrote:
> > >
> > > On Fri, Aug 2, 2019 at 11:29 AM Brian Starkey <[email protected]> wrote:
> > > >
> > > > Hi Lowry,
> > > >
> > > > On Thu, Aug 01, 2019 at 06:34:08AM +0000, Lowry Li (Arm Technology China) wrote:
> > > > > Hi Brian,
> > > > >
> > > > > On Wed, Jul 31, 2019 at 09:20:04PM +0800, Brian Starkey wrote:
> > > > > > Hi Lowry,
> > > > > >
> > > > > > Thanks for this cleanup.
> > > > > >
> > > > > > On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> > > > > > > During it signals the completion of a writeback job, after releasing
> > > > > > > the out_fence, we'd clear the pointer.
> > > > > > >
> > > > > > > Check if fence left over in drm_writeback_cleanup_job(), release it.
> > > > > > >
> > > > > > > Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> > > > > > > ---
> > > > > > > drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> > > > > > > 1 file changed, 15 insertions(+), 8 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> > > > > > > index ff138b6..43d9e3b 100644
> > > > > > > --- a/drivers/gpu/drm/drm_writeback.c
> > > > > > > +++ b/drivers/gpu/drm/drm_writeback.c
> > > > > > > @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> > > > > > > if (job->fb)
> > > > > > > drm_framebuffer_put(job->fb);
> > > > > > >
> > > > > > > + if (job->out_fence)
> > > > > >
> > > > > > I'm thinking it might be a good idea to signal the fence with an error
> > > > > > here, if it's not already signaled. Otherwise, if there's someone
> > > > > > waiting (which there shouldn't be), they're going to be waiting a very
> > > > > > long time :-)
> > > > > >
> > > > > > Thanks,
> > > > > > -Brian
> > > > > >
> > > > > Here it happened at atomic_check failed and test only commit. For both
> > > > > cases, the commit has been dropped and it's only a clean up. So here better
> > > > > not be treated as an error case:)
> > > >
> > > > If anyone else has a reference on the fence, then IMO it absolutely is
> > > > an error to reach this point without the fence being signaled -
> > > > because it means that the fence will never be signaled.
> > > >
> > > > I don't think the API gives you a way to check if this is the last
> > > > reference, so it's safest to just make sure the fence is signalled
> > > > before dropping the reference.
> > > >
> > > > It just feels wrong to me to have the possibility of a dangling fence
> > > > which is never going to get signalled; and it's an easy defensive step
> > > > to make sure it can never happen.
> > > >
> > > > I know it _shouldn't_ happen, but we often put in handling for cases
> > > > which shouldn't happen, because they frequently do happen :-)
> > >
> > > We're not as paranoid with the vblank fences either, so not sure why
> > > we need to be this paranoid with writeback fences. If your driver
> > > grabs anything from the atomic state in ->atomic_check it's buggy
> > > anyway.
> > >
> > > If you want to fix this properly I think we need to move the call to
> > > prepare_signalling() in between atomic_check and atomic_commit. Then I
> > > think it makes sense to also force-complete the fence on error ...
>
> Well, fair enough. I'm struggling with "that's too paranoid" vs "fix
> it properly" though? Is it a "problem" worth fixing or not?

Up to you to decide that.

> It seems natural to me to do the fence cleanup in the cleanup function
> for the object which owns the fence.
>
> > >
> > > > > Since for userspace, it should have been failed or a test only case, so
> > > > > writebace fence should not be signaled.
> > > >
> > > > It's not only userspace that can wait on fences (and in fact this
> > > > fence will never even reach userspace if the commit fails), the driver
> > > > may have taken a copy to use for "something".
> >
> > I forgot to add: you can check this by looking at the fence reference
> > count. A WARN_ON if that's more than 1 on cleanup (but also for the
> > out fences) could be a nice addition.
>
> Do we really want to be looking at the fence internals directly like
> that?

Wrap it up in a helper like dma_fence_release_private or whatever, which
combines the check and (hopefully final) _put(). Might need a better name.
-Daniel

>
> Cheers,
> -Brian
>
> > -Daniel
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > +41 (0) 79 365 57 48 - http://blog.ffwll.ch

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2019-08-05 13:11:53

by Brian Starkey

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] drm: Clear the fence pointer when writeback job signaled

Hi Lowry,

Based on Daniel's input, this patch looks fine:

Reviewed-by: Brian Starkey <[email protected]>

I think there's some opportunity for improvement around
prepare_signaling/complete_signaling, but that can be treated as
separate from fixing this bug.

Thanks,
-Brian

On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> During it signals the completion of a writeback job, after releasing
> the out_fence, we'd clear the pointer.
>
> Check if fence left over in drm_writeback_cleanup_job(), release it.
>
> Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> ---
> drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> index ff138b6..43d9e3b 100644
> --- a/drivers/gpu/drm/drm_writeback.c
> +++ b/drivers/gpu/drm/drm_writeback.c
> @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> if (job->fb)
> drm_framebuffer_put(job->fb);
>
> + if (job->out_fence)
> + dma_fence_put(job->out_fence);
> +
> kfree(job);
> }
> EXPORT_SYMBOL(drm_writeback_cleanup_job);
> @@ -366,25 +369,29 @@ static void cleanup_work(struct work_struct *work)
> {
> unsigned long flags;
> struct drm_writeback_job *job;
> + struct dma_fence *out_fence;
>
> spin_lock_irqsave(&wb_connector->job_lock, flags);
> job = list_first_entry_or_null(&wb_connector->job_queue,
> struct drm_writeback_job,
> list_entry);
> - if (job) {
> + if (job)
> list_del(&job->list_entry);
> - if (job->out_fence) {
> - if (status)
> - dma_fence_set_error(job->out_fence, status);
> - dma_fence_signal(job->out_fence);
> - dma_fence_put(job->out_fence);
> - }
> - }
> +
> spin_unlock_irqrestore(&wb_connector->job_lock, flags);
>
> if (WARN_ON(!job))
> return;
>
> + out_fence = job->out_fence;
> + if (out_fence) {
> + if (status)
> + dma_fence_set_error(out_fence, status);
> + dma_fence_signal(out_fence);
> + dma_fence_put(out_fence);
> + job->out_fence = NULL;
> + }
> +
> INIT_WORK(&job->cleanup_work, cleanup_work);
> queue_work(system_long_wq, &job->cleanup_work);
> }
> --
> 1.9.1
>

2019-09-24 16:50:29

by James Qian Wang

[permalink] [raw]
Subject: Re: [v1,2/2] drm: Clear the fence pointer when writeback job signaled

On Wed, Jul 31, 2019 at 11:04:45AM +0000, Lowry Li (Arm Technology China) wrote:
> During it signals the completion of a writeback job, after releasing
> the out_fence, we'd clear the pointer.
>
> Check if fence left over in drm_writeback_cleanup_job(), release it.
>
> Signed-off-by: Lowry Li (Arm Technology China) <[email protected]>
> Reviewed-by: Brian Starkey <[email protected]>

Looks good to me.

Reviewed-by: James Qian Wang (Arm Technology China) <[email protected]>

will push it to drm-misc-fixes

James

> ---
> drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> index ff138b6..43d9e3b 100644
> --- a/drivers/gpu/drm/drm_writeback.c
> +++ b/drivers/gpu/drm/drm_writeback.c
> @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job)
> if (job->fb)
> drm_framebuffer_put(job->fb);
>
> + if (job->out_fence)
> + dma_fence_put(job->out_fence);
> +
> kfree(job);
> }
> EXPORT_SYMBOL(drm_writeback_cleanup_job);
> @@ -366,25 +369,29 @@ static void cleanup_work(struct work_struct *work)
> {
> unsigned long flags;
> struct drm_writeback_job *job;
> + struct dma_fence *out_fence;
>
> spin_lock_irqsave(&wb_connector->job_lock, flags);
> job = list_first_entry_or_null(&wb_connector->job_queue,
> struct drm_writeback_job,
> list_entry);
> - if (job) {
> + if (job)
> list_del(&job->list_entry);
> - if (job->out_fence) {
> - if (status)
> - dma_fence_set_error(job->out_fence, status);
> - dma_fence_signal(job->out_fence);
> - dma_fence_put(job->out_fence);
> - }
> - }
> +
> spin_unlock_irqrestore(&wb_connector->job_lock, flags);
>
> if (WARN_ON(!job))
> return;
>
> + out_fence = job->out_fence;
> + if (out_fence) {
> + if (status)
> + dma_fence_set_error(out_fence, status);
> + dma_fence_signal(out_fence);
> + dma_fence_put(out_fence);
> + job->out_fence = NULL;
> + }
> +
> INIT_WORK(&job->cleanup_work, cleanup_work);
> queue_work(system_long_wq, &job->cleanup_work);
> }