Found two separate spots where i915 was throwing "sleeping
function called from invalid context" when running on a
PREEMPT_RT kernel. In both cases it was from calling
local_irq_disable prior to taking a spin_lock. Since spin
locks are converted to rt_mutex_t on PREEMPT_RT this means
that we might sleep with interrupts disabled.
Since in both cases the calls were in threaded context on RT
(irq or ksoftirqd) and in no danger of reentrance, change the
code to only disable interrupts on non-PREEMPT_RT kernels.
Signed-off-by: Clark Williams <[email protected]>
---
drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 6 ++++--
drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 ++++--
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 38cc42783dfb..b8bf8d6d3c61 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -318,9 +318,11 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
/* Kick the work once more to drain the signalers, and disarm the irq */
irq_work_sync(&b->irq_work);
while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
signal_irq_work(&b->irq_work);
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
cond_resched();
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index fc77592d88a9..0e918831b69f 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1580,9 +1580,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
static void execlists_dequeue_irq(struct intel_engine_cs *engine)
{
- local_irq_disable(); /* Suspend interrupts across request submission */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable(); /* Suspend interrupts across request submission */
execlists_dequeue(engine);
- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
}
static void clear_ports(struct i915_request **ports, int count)
--
2.31.1
--
The United States Coast Guard
Ruining Natural Selection since 1790
On 2021-08-23 15:00:15 [-0500], Clark Williams wrote:
> Found two separate spots where i915 was throwing "sleeping
> function called from invalid context" when running on a
> PREEMPT_RT kernel. In both cases it was from calling
> local_irq_disable prior to taking a spin_lock. Since spin
> locks are converted to rt_mutex_t on PREEMPT_RT this means
> that we might sleep with interrupts disabled.
>
> Since in both cases the calls were in threaded context on RT
> (irq or ksoftirqd) and in no danger of reentrance, change the
> code to only disable interrupts on non-PREEMPT_RT kernels.
>
> Signed-off-by: Clark Williams <[email protected]>
> ---
> drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 6 ++++--
> drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 ++++--
> 2 files changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 38cc42783dfb..b8bf8d6d3c61 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -318,9 +318,11 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
> /* Kick the work once more to drain the signalers, and disarm the irq */
> irq_work_sync(&b->irq_work);
> while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
> - local_irq_disable();
> + if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> + local_irq_disable();
> signal_irq_work(&b->irq_work);
> - local_irq_enable();
> + if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> + local_irq_enable();
wouldn't it work to use irq_work_queue() + sync() instead of invoking
the target callback itself? Given that this context is IRQ-enabled then
it should (at least on x86) trigger right away.
> cond_resched();
> }
> }
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index fc77592d88a9..0e918831b69f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -1580,9 +1580,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>
> static void execlists_dequeue_irq(struct intel_engine_cs *engine)
> {
> - local_irq_disable(); /* Suspend interrupts across request submission */
> + if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> + local_irq_disable(); /* Suspend interrupts across request submission */
> execlists_dequeue(engine);
I've been staring at this for a while. Wouldn't it work in invoke
execlists_dequeue() and let execlists_dequeue() do
spin_lock_irq(&engine->active.lock);
? This is the only invocation of the function. I don't know what the
expected synchronisation behaviour is. The only thing that could break
is the tail part of the function after the &engine->active.lock has been
dropped.
> - local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
> + if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> + local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
> }
>
> static void clear_ports(struct i915_request **ports, int count)
Sebastian
On 2021-08-23 15:00:15 [-0500], Clark Williams wrote:
> Found two separate spots where i915 was throwing "sleeping
> function called from invalid context" when running on a
> PREEMPT_RT kernel. In both cases it was from calling
> local_irq_disable prior to taking a spin_lock. Since spin
> locks are converted to rt_mutex_t on PREEMPT_RT this means
> that we might sleep with interrupts disabled.
>
> Since in both cases the calls were in threaded context on RT
> (irq or ksoftirqd) and in no danger of reentrance, change the
> code to only disable interrupts on non-PREEMPT_RT kernels.
>
> Signed-off-by: Clark Williams <[email protected]>
I try to deal with this in
https://lkml.kernel.org/r/[email protected]
Sebastian