From: Chris Wilson <[email protected]>
There may be a disconnect between the GT used by the engine and the GT
used for the VM, requiring us to hold a wakeref on both while the GPU is
active with this request.
v2: added explanation to __queue_and_release_pm
Signed-off-by: Chris Wilson <[email protected]>
[ahajda: removed not-yet-upstremed wakeref tracking bits]
Signed-off-by: Andrzej Hajda <[email protected]>
---
Changes in v2:
- Link to v1: https://lore.kernel.org/r/[email protected]
---
drivers/gpu/drm/i915/gt/intel_context.h | 15 +++++++++++----
drivers/gpu/drm/i915/gt/intel_engine_pm.c | 9 +++++++++
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 0a8d553da3f439..48f888c3da083b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -14,6 +14,7 @@
#include "i915_drv.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
+#include "intel_gt_pm.h"
#include "intel_ring_types.h"
#include "intel_timeline_types.h"
#include "i915_trace.h"
@@ -207,8 +208,11 @@ void intel_context_exit_engine(struct intel_context *ce);
static inline void intel_context_enter(struct intel_context *ce)
{
lockdep_assert_held(&ce->timeline->mutex);
- if (!ce->active_count++)
- ce->ops->enter(ce);
+ if (ce->active_count++)
+ return;
+
+ ce->ops->enter(ce);
+ intel_gt_pm_get(ce->vm->gt);
}
static inline void intel_context_mark_active(struct intel_context *ce)
@@ -222,8 +226,11 @@ static inline void intel_context_exit(struct intel_context *ce)
{
lockdep_assert_held(&ce->timeline->mutex);
GEM_BUG_ON(!ce->active_count);
- if (!--ce->active_count)
- ce->ops->exit(ce);
+ if (--ce->active_count)
+ return;
+
+ intel_gt_pm_put_async(ce->vm->gt);
+ ce->ops->exit(ce);
}
static inline struct intel_context *intel_context_get(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e971b153fda976..ee531a5c142c77 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -114,6 +114,15 @@ __queue_and_release_pm(struct i915_request *rq,
ENGINE_TRACE(engine, "parking\n");
+ /*
+ * Open coded one half of intel_context_enter, which we have to omit
+ * here (see the large comment below) and because the other part must
+ * not be called due constructing directly with __i915_request_create
+ * which increments active count via intel_context_mark_active.
+ */
+ GEM_BUG_ON(rq->context->active_count != 1);
+ __intel_gt_pm_get(engine->gt);
+
/*
* We have to serialise all potential retirement paths with our
* submission, as we don't want to underflow either the
---
base-commit: 3385d6482cd60f2a0bbb0fa97b70ae7dbba4f95c
change-id: 20230330-hold_wakeref_for_active_vm-7f013a449ef3
Best regards,
--
Andrzej Hajda <[email protected]>
On 31/03/2023 15:16, Andrzej Hajda wrote:
> From: Chris Wilson <[email protected]>
>
> There may be a disconnect between the GT used by the engine and the GT
> used for the VM, requiring us to hold a wakeref on both while the GPU is
> active with this request.
>
> v2: added explanation to __queue_and_release_pm
>
> Signed-off-by: Chris Wilson <[email protected]>
> [ahajda: removed not-yet-upstremed wakeref tracking bits]
> Signed-off-by: Andrzej Hajda <[email protected]>
> ---
> Changes in v2:
> - Link to v1: https://lore.kernel.org/r/[email protected]
> ---
> drivers/gpu/drm/i915/gt/intel_context.h | 15 +++++++++++----
> drivers/gpu/drm/i915/gt/intel_engine_pm.c | 9 +++++++++
> 2 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 0a8d553da3f439..48f888c3da083b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -14,6 +14,7 @@
> #include "i915_drv.h"
> #include "intel_context_types.h"
> #include "intel_engine_types.h"
> +#include "intel_gt_pm.h"
> #include "intel_ring_types.h"
> #include "intel_timeline_types.h"
> #include "i915_trace.h"
> @@ -207,8 +208,11 @@ void intel_context_exit_engine(struct intel_context *ce);
> static inline void intel_context_enter(struct intel_context *ce)
> {
> lockdep_assert_held(&ce->timeline->mutex);
> - if (!ce->active_count++)
> - ce->ops->enter(ce);
> + if (ce->active_count++)
> + return;
> +
> + ce->ops->enter(ce);
> + intel_gt_pm_get(ce->vm->gt);
> }
>
> static inline void intel_context_mark_active(struct intel_context *ce)
> @@ -222,8 +226,11 @@ static inline void intel_context_exit(struct intel_context *ce)
> {
> lockdep_assert_held(&ce->timeline->mutex);
> GEM_BUG_ON(!ce->active_count);
> - if (!--ce->active_count)
> - ce->ops->exit(ce);
> + if (--ce->active_count)
> + return;
> +
> + intel_gt_pm_put_async(ce->vm->gt);
> + ce->ops->exit(ce);
> }
>
> static inline struct intel_context *intel_context_get(struct intel_context *ce)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index e971b153fda976..ee531a5c142c77 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -114,6 +114,15 @@ __queue_and_release_pm(struct i915_request *rq,
>
> ENGINE_TRACE(engine, "parking\n");
>
> + /*
> + * Open coded one half of intel_context_enter, which we have to omit
> + * here (see the large comment below) and because the other part must
> + * not be called due constructing directly with __i915_request_create
> + * which increments active count via intel_context_mark_active.
> + */
> + GEM_BUG_ON(rq->context->active_count != 1);
> + __intel_gt_pm_get(engine->gt);
> +
> /*
> * We have to serialise all potential retirement paths with our
> * submission, as we don't want to underflow either the
Reviewed-by: Tvrtko Ursulin <[email protected]>
Regards,
Tvrtko
Hi Andrzej,
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 0a8d553da3f439..48f888c3da083b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -14,6 +14,7 @@
> #include "i915_drv.h"
> #include "intel_context_types.h"
> #include "intel_engine_types.h"
> +#include "intel_gt_pm.h"
> #include "intel_ring_types.h"
> #include "intel_timeline_types.h"
> #include "i915_trace.h"
> @@ -207,8 +208,11 @@ void intel_context_exit_engine(struct intel_context *ce);
> static inline void intel_context_enter(struct intel_context *ce)
> {
> lockdep_assert_held(&ce->timeline->mutex);
> - if (!ce->active_count++)
> - ce->ops->enter(ce);
> + if (ce->active_count++)
> + return;
> +
> + ce->ops->enter(ce);
> + intel_gt_pm_get(ce->vm->gt);
> }
>
> static inline void intel_context_mark_active(struct intel_context *ce)
> @@ -222,8 +226,11 @@ static inline void intel_context_exit(struct intel_context *ce)
> {
> lockdep_assert_held(&ce->timeline->mutex);
> GEM_BUG_ON(!ce->active_count);
> - if (!--ce->active_count)
> - ce->ops->exit(ce);
> + if (--ce->active_count)
> + return;
> +
> + intel_gt_pm_put_async(ce->vm->gt);
> + ce->ops->exit(ce);
shouldn't these two be swapped?
> }
>
> static inline struct intel_context *intel_context_get(struct intel_context *ce)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index e971b153fda976..ee531a5c142c77 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -114,6 +114,15 @@ __queue_and_release_pm(struct i915_request *rq,
>
> ENGINE_TRACE(engine, "parking\n");
>
> + /*
> + * Open coded one half of intel_context_enter, which we have to omit
> + * here (see the large comment below) and because the other part must
> + * not be called due constructing directly with __i915_request_create
> + * which increments active count via intel_context_mark_active.
> + */
> + GEM_BUG_ON(rq->context->active_count != 1);
> + __intel_gt_pm_get(engine->gt);
where is it's brother "put"?
Thanks,
Andi
> +
> /*
> * We have to serialise all potential retirement paths with our
> * submission, as we don't want to underflow either the
>
> ---
> base-commit: 3385d6482cd60f2a0bbb0fa97b70ae7dbba4f95c
> change-id: 20230330-hold_wakeref_for_active_vm-7f013a449ef3
>
> Best regards,
> --
> Andrzej Hajda <[email protected]>
Hi Tvrtko,
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> > > index 0a8d553da3f439..48f888c3da083b 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_context.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> > > @@ -14,6 +14,7 @@
> > > #include "i915_drv.h"
> > > #include "intel_context_types.h"
> > > #include "intel_engine_types.h"
> > > +#include "intel_gt_pm.h"
> > > #include "intel_ring_types.h"
> > > #include "intel_timeline_types.h"
> > > #include "i915_trace.h"
> > > @@ -207,8 +208,11 @@ void intel_context_exit_engine(struct intel_context *ce);
> > > static inline void intel_context_enter(struct intel_context *ce)
> > > {
> > > lockdep_assert_held(&ce->timeline->mutex);
> > > - if (!ce->active_count++)
> > > - ce->ops->enter(ce);
> > > + if (ce->active_count++)
> > > + return;
> > > +
> > > + ce->ops->enter(ce);
> > > + intel_gt_pm_get(ce->vm->gt);
> > > }
> > > static inline void intel_context_mark_active(struct intel_context *ce)
> > > @@ -222,8 +226,11 @@ static inline void intel_context_exit(struct intel_context *ce)
> > > {
> > > lockdep_assert_held(&ce->timeline->mutex);
> > > GEM_BUG_ON(!ce->active_count);
> > > - if (!--ce->active_count)
> > > - ce->ops->exit(ce);
> > > + if (--ce->active_count)
> > > + return;
> > > +
> > > + intel_gt_pm_put_async(ce->vm->gt);
> > > + ce->ops->exit(ce);
> >
> > shouldn't these two be swapped?
maybe I wasn't clear here... shouldn't it be
ce->ops->exit(ce);
intel_gt_pm_put_async(ce->vm->gt);
Don't we need to hold the pm until exiting?
> > > }
> > > static inline struct intel_context *intel_context_get(struct intel_context *ce)
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > > index e971b153fda976..ee531a5c142c77 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > > @@ -114,6 +114,15 @@ __queue_and_release_pm(struct i915_request *rq,
> > > ENGINE_TRACE(engine, "parking\n");
> > > + /*
> > > + * Open coded one half of intel_context_enter, which we have to omit
> > > + * here (see the large comment below) and because the other part must
> > > + * not be called due constructing directly with __i915_request_create
> > > + * which increments active count via intel_context_mark_active.
> > > + */
> > > + GEM_BUG_ON(rq->context->active_count != 1);
> > > + __intel_gt_pm_get(engine->gt);
> >
> > where is it's brother "put"?
>
> It's in request retire via intel_context_exit. Ie. request construction is
> special here, while retirement is standard.
Thank you!
Andi
On 04/04/2023 16:39, Andi Shyti wrote:
> Hi Andrzej,
>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
>> index 0a8d553da3f439..48f888c3da083b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
>> @@ -14,6 +14,7 @@
>> #include "i915_drv.h"
>> #include "intel_context_types.h"
>> #include "intel_engine_types.h"
>> +#include "intel_gt_pm.h"
>> #include "intel_ring_types.h"
>> #include "intel_timeline_types.h"
>> #include "i915_trace.h"
>> @@ -207,8 +208,11 @@ void intel_context_exit_engine(struct intel_context *ce);
>> static inline void intel_context_enter(struct intel_context *ce)
>> {
>> lockdep_assert_held(&ce->timeline->mutex);
>> - if (!ce->active_count++)
>> - ce->ops->enter(ce);
>> + if (ce->active_count++)
>> + return;
>> +
>> + ce->ops->enter(ce);
>> + intel_gt_pm_get(ce->vm->gt);
>> }
>>
>> static inline void intel_context_mark_active(struct intel_context *ce)
>> @@ -222,8 +226,11 @@ static inline void intel_context_exit(struct intel_context *ce)
>> {
>> lockdep_assert_held(&ce->timeline->mutex);
>> GEM_BUG_ON(!ce->active_count);
>> - if (!--ce->active_count)
>> - ce->ops->exit(ce);
>> + if (--ce->active_count)
>> + return;
>> +
>> + intel_gt_pm_put_async(ce->vm->gt);
>> + ce->ops->exit(ce);
>
> shouldn't these two be swapped?
>
>> }
>>
>> static inline struct intel_context *intel_context_get(struct intel_context *ce)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>> index e971b153fda976..ee531a5c142c77 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>> @@ -114,6 +114,15 @@ __queue_and_release_pm(struct i915_request *rq,
>>
>> ENGINE_TRACE(engine, "parking\n");
>>
>> + /*
>> + * Open coded one half of intel_context_enter, which we have to omit
>> + * here (see the large comment below) and because the other part must
>> + * not be called due constructing directly with __i915_request_create
>> + * which increments active count via intel_context_mark_active.
>> + */
>> + GEM_BUG_ON(rq->context->active_count != 1);
>> + __intel_gt_pm_get(engine->gt);
>
> where is it's brother "put"?
It's in request retire via intel_context_exit. Ie. request construction
is special here, while retirement is standard.
Regards,
Tvrtko
>
> Thanks,
> Andi
>
>> +
>> /*
>> * We have to serialise all potential retirement paths with our
>> * submission, as we don't want to underflow either the
>>
>> ---
>> base-commit: 3385d6482cd60f2a0bbb0fa97b70ae7dbba4f95c
>> change-id: 20230330-hold_wakeref_for_active_vm-7f013a449ef3
>>
>> Best regards,
>> --
>> Andrzej Hajda <[email protected]>
On 04/04/2023 17:00, Andi Shyti wrote:
> Hi Tvrtko,
>
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
>>>> index 0a8d553da3f439..48f888c3da083b 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_context.h
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
>>>> @@ -14,6 +14,7 @@
>>>> #include "i915_drv.h"
>>>> #include "intel_context_types.h"
>>>> #include "intel_engine_types.h"
>>>> +#include "intel_gt_pm.h"
>>>> #include "intel_ring_types.h"
>>>> #include "intel_timeline_types.h"
>>>> #include "i915_trace.h"
>>>> @@ -207,8 +208,11 @@ void intel_context_exit_engine(struct intel_context *ce);
>>>> static inline void intel_context_enter(struct intel_context *ce)
>>>> {
>>>> lockdep_assert_held(&ce->timeline->mutex);
>>>> - if (!ce->active_count++)
>>>> - ce->ops->enter(ce);
>>>> + if (ce->active_count++)
>>>> + return;
>>>> +
>>>> + ce->ops->enter(ce);
>>>> + intel_gt_pm_get(ce->vm->gt);
>>>> }
>>>> static inline void intel_context_mark_active(struct intel_context *ce)
>>>> @@ -222,8 +226,11 @@ static inline void intel_context_exit(struct intel_context *ce)
>>>> {
>>>> lockdep_assert_held(&ce->timeline->mutex);
>>>> GEM_BUG_ON(!ce->active_count);
>>>> - if (!--ce->active_count)
>>>> - ce->ops->exit(ce);
>>>> + if (--ce->active_count)
>>>> + return;
>>>> +
>>>> + intel_gt_pm_put_async(ce->vm->gt);
>>>> + ce->ops->exit(ce);
>>>
>>> shouldn't these two be swapped?
>
> maybe I wasn't clear here... shouldn't it be
I missed this one.
> ce->ops->exit(ce);
> intel_gt_pm_put_async(ce->vm->gt);
>
> Don't we need to hold the pm until exiting?
I think it doesn't matter. The problematic edge case this is fixing is
when ce->engine->gt is different from ce->vm->gt but at this point if it
is safe to release one it must be safe to release the other too.
Regards,
Tvrtko
>
>>>> }
>>>> static inline struct intel_context *intel_context_get(struct intel_context *ce)
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>> index e971b153fda976..ee531a5c142c77 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>> @@ -114,6 +114,15 @@ __queue_and_release_pm(struct i915_request *rq,
>>>> ENGINE_TRACE(engine, "parking\n");
>>>> + /*
>>>> + * Open coded one half of intel_context_enter, which we have to omit
>>>> + * here (see the large comment below) and because the other part must
>>>> + * not be called due constructing directly with __i915_request_create
>>>> + * which increments active count via intel_context_mark_active.
>>>> + */
>>>> + GEM_BUG_ON(rq->context->active_count != 1);
>>>> + __intel_gt_pm_get(engine->gt);
>>>
>>> where is it's brother "put"?
>>
>> It's in request retire via intel_context_exit. Ie. request construction is
>> special here, while retirement is standard.
>
> Thank you!
> Andi
Hi,
On Fri, Mar 31, 2023 at 04:16:36PM +0200, Andrzej Hajda wrote:
> From: Chris Wilson <[email protected]>
>
> There may be a disconnect between the GT used by the engine and the GT
> used for the VM, requiring us to hold a wakeref on both while the GPU is
> active with this request.
>
> v2: added explanation to __queue_and_release_pm
>
> Signed-off-by: Chris Wilson <[email protected]>
> [ahajda: removed not-yet-upstremed wakeref tracking bits]
> Signed-off-by: Andrzej Hajda <[email protected]>
Thank you Tvrtko and Chris for answering my questions,
Reviewed-by: Andi Shyti <[email protected]>
Andi
On 31.03.2023 16:16, Andrzej Hajda wrote:
> From: Chris Wilson <[email protected]>
>
> There may be a disconnect between the GT used by the engine and the GT
> used for the VM, requiring us to hold a wakeref on both while the GPU is
> active with this request.
>
> v2: added explanation to __queue_and_release_pm
>
> Signed-off-by: Chris Wilson <[email protected]>
> [ahajda: removed not-yet-upstremed wakeref tracking bits]
> Signed-off-by: Andrzej Hajda <[email protected]>
Queued.
Regards
Andrzej