2022-07-07 16:14:33

by Akhil P Oommen

[permalink] [raw]
Subject: [PATCH 0/7] Improve GPU Recovery


Recently, I have debugged a few device crashes which occured during
a recovery after a hangcheck timeout. It looks like there are a few
things we can do to improve our chance at a successful gpu recovery.

First one is to ensure that CX GDSC collapses which clears the internal
states in gpu's CX domain. First 5 patches tries to handle this.

Rest of the patches are to ensure that few internal blocks like CP, GMU
and GBIF are halted properly before proceeding for a snapshot followed by
recovery. Also, handle 'prepare slumber' hfi failure correctly. These
are A6x specific improvements.


Akhil P Oommen (7):
drm/msm: Remove unnecessary pm_runtime_get/put
drm/msm: Correct pm_runtime votes in recover worker
drm/msm: Fix cx collapse issue during recovery
drm/msm: Ensure cx gdsc collapse during recovery
arm64: dts: qcom: sc7280: Update gpu register list
drm/msm/a6xx: Improve gpu recovery sequence
drm/msm/a6xx: Handle GMU prepare-slumber hfi failure

arch/arm64/boot/dts/qcom/sc7280.dtsi | 6 ++-
drivers/gpu/drm/msm/adreno/a6xx.xml.h | 4 ++
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 83 ++++++++++++++++++++++-------------
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 36 +++++++++++++--
drivers/gpu/drm/msm/msm_gpu.c | 9 ++--
drivers/gpu/drm/msm/msm_gpu.h | 1 +
drivers/gpu/drm/msm/msm_ringbuffer.c | 4 --
7 files changed, 100 insertions(+), 43 deletions(-)

--
2.7.4


2022-07-07 16:14:35

by Akhil P Oommen

[permalink] [raw]
Subject: [PATCH 1/7] drm/msm: Remove unnecessary pm_runtime_get/put

We already enable gpu power from msm_gpu_submit(), so avoid a duplicate
pm_runtime_get/put from msm_job_run().

Signed-off-by: Akhil P Oommen <[email protected]>
---

drivers/gpu/drm/msm/msm_ringbuffer.c | 4 ----
1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 4306632..82bee84 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -29,8 +29,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
msm_gem_unlock(obj);
}

- pm_runtime_get_sync(&gpu->pdev->dev);
-
/* TODO move submit path over to using a per-ring lock.. */
mutex_lock(&gpu->lock);

@@ -38,8 +36,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)

mutex_unlock(&gpu->lock);

- pm_runtime_put(&gpu->pdev->dev);
-
return dma_fence_get(submit->hw_fence);
}

--
2.7.4

2022-07-07 16:26:09

by Akhil P Oommen

[permalink] [raw]
Subject: [PATCH 2/7] drm/msm: Correct pm_runtime votes in recover worker

In the scenario where there is one a single submit which is hung, gpu is
power collapsed when it is retired. Because of this, by the time we call
reover(), gpu state would be already clear. Fix this by correctly
managing the pm runtime votes.

Signed-off-by: Akhil P Oommen <[email protected]>
---

drivers/gpu/drm/msm/msm_gpu.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index eb8a666..f75ff4b 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -416,7 +416,6 @@ static void recover_worker(struct kthread_work *work)
/* Record the crash state */
pm_runtime_get_sync(&gpu->pdev->dev);
msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
- pm_runtime_put_sync(&gpu->pdev->dev);

kfree(cmd);
kfree(comm);
@@ -464,6 +463,8 @@ static void recover_worker(struct kthread_work *work)
}
}

+ pm_runtime_put_sync(&gpu->pdev->dev);
+
mutex_unlock(&gpu->lock);

msm_gpu_retire(gpu);
--
2.7.4

2022-07-07 16:38:09

by Akhil P Oommen

[permalink] [raw]
Subject: [PATCH 4/7] drm/msm: Ensure cx gdsc collapse during recovery

To improve our chance of a successful recovery, we should ensure that
cx headswitch collapses. Cx headswitch might be kept enabled through a
vote from another driver like iommu or even another hardware subsystem.
So, poll the cx gdscr register to ensure that it collapses during
recovery.

Signed-off-by: Akhil P Oommen <[email protected]>
---

drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 13 ++++++++++++-
drivers/gpu/drm/msm/msm_gpu.c | 4 ++++
drivers/gpu/drm/msm/msm_gpu.h | 1 +
3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 57a7ad5..e956a13 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1189,11 +1189,15 @@ static void a6xx_dump(struct msm_gpu *gpu)
#define VBIF_RESET_ACK_TIMEOUT 100
#define VBIF_RESET_ACK_MASK 0x00f0

+#define CX_GDSCR_OFFSET 0x106c
+#define CX_GDSC_ON_MASK BIT(31)
+
static void a6xx_recover(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
- int i;
+ int i, ret;
+ u32 val;

adreno_dump_info(gpu);

@@ -1220,6 +1224,13 @@ static void a6xx_recover(struct msm_gpu *gpu)
/* And the final one from recover worker */
pm_runtime_put_sync(&gpu->pdev->dev);

+ if (gpu->gpucc_io) {
+ ret = readl_poll_timeout(gpu->gpucc_io + CX_GDSCR_OFFSET, val,
+ !(val & CX_GDSC_ON_MASK), 100, 500000);
+ if (ret)
+ DRM_DEV_INFO(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
+ }
+
for (i = gpu->active_submits; i > 0; i--)
pm_runtime_get(&gpu->pdev->dev);

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 48171b6..29ee615 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -881,6 +881,10 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
goto fail;
}

+ gpu->gpucc_io = msm_ioremap(pdev, "gpucc");
+ if (IS_ERR(gpu->gpucc_io))
+ gpu->gpucc_io = NULL;
+
/* Get Interrupt: */
gpu->irq = platform_get_irq(pdev, 0);
if (gpu->irq < 0) {
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 6def008..07578778 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -217,6 +217,7 @@ struct msm_gpu {
int global_faults;

void __iomem *mmio;
+ void __iomem *gpucc_io;
int irq;

struct msm_gem_address_space *aspace;
--
2.7.4

2022-07-07 16:38:29

by Akhil P Oommen

[permalink] [raw]
Subject: [PATCH 7/7] drm/msm/a6xx: Handle GMU prepare-slumber hfi failure

When prepare-slumber hfi fails, we should follow a6xx_gmu_force_off()
sequence.

Signed-off-by: Akhil P Oommen <[email protected]>
---

drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index db05942..3d00ef9 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1082,7 +1082,11 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
a6xx_bus_clear_pending_transactions(adreno_gpu);

/* tell the GMU we want to slumber */
- a6xx_gmu_notify_slumber(gmu);
+ ret = a6xx_gmu_notify_slumber(gmu);
+ if (ret) {
+ a6xx_gmu_force_off(gmu);
+ return;
+ }

ret = gmu_poll_timeout(gmu,
REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val,
--
2.7.4

2022-07-07 16:53:25

by Akhil P Oommen

[permalink] [raw]
Subject: [PATCH 5/7] arm64: dts: qcom: sc7280: Update gpu register list

Update gpu register array with gpucc memory region.

Signed-off-by: Akhil P Oommen <[email protected]>
---

arch/arm64/boot/dts/qcom/sc7280.dtsi | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index e66fc67..defdb25 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -2228,10 +2228,12 @@
compatible = "qcom,adreno-635.0", "qcom,adreno";
reg = <0 0x03d00000 0 0x40000>,
<0 0x03d9e000 0 0x1000>,
- <0 0x03d61000 0 0x800>;
+ <0 0x03d61000 0 0x800>,
+ <0 0x03d90000 0 0x2000>;
reg-names = "kgsl_3d0_reg_memory",
"cx_mem",
- "cx_dbgc";
+ "cx_dbgc",
+ "gpucc";
interrupts = <GIC_SPI 300 IRQ_TYPE_LEVEL_HIGH>;
iommus = <&adreno_smmu 0 0x401>;
operating-points-v2 = <&gpu_opp_table>;
--
2.7.4

2022-07-07 16:57:44

by Akhil P Oommen

[permalink] [raw]
Subject: [PATCH 3/7] drm/msm: Fix cx collapse issue during recovery

There are some hardware logic under CX domain. For a successful
recovery, we should ensure cx headswitch collapses to ensure all the
stale states are cleard out. This is especially true to for a6xx family
where we can GMU co-processor.

Currently, cx doesn't collapse due to a devlink between gpu and its
smmu. So the *struct gpu device* needs to be runtime suspended to ensure
that the iommu driver removes its vote on cx gdsc.

Signed-off-by: Akhil P Oommen <[email protected]>
---

drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 16 ++++++++++++++--
drivers/gpu/drm/msm/msm_gpu.c | 2 --
2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 42ed9a3..57a7ad5 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1210,8 +1210,20 @@ static void a6xx_recover(struct msm_gpu *gpu)
*/
gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);

- gpu->funcs->pm_suspend(gpu);
- gpu->funcs->pm_resume(gpu);
+ /*
+ * Now drop all the pm_runtime usage count to allow cx gdsc to collapse.
+ * First drop the usage count from all active submits
+ */
+ for (i = gpu->active_submits; i > 0; i--)
+ pm_runtime_put(&gpu->pdev->dev);
+
+ /* And the final one from recover worker */
+ pm_runtime_put_sync(&gpu->pdev->dev);
+
+ for (i = gpu->active_submits; i > 0; i--)
+ pm_runtime_get(&gpu->pdev->dev);
+
+ pm_runtime_get_sync(&gpu->pdev->dev);

msm_gpu_hw_init(gpu);
}
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index f75ff4b..48171b6 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -444,9 +444,7 @@ static void recover_worker(struct kthread_work *work)
/* retire completed submits, plus the one that hung: */
retire_submits(gpu);

- pm_runtime_get_sync(&gpu->pdev->dev);
gpu->funcs->recover(gpu);
- pm_runtime_put_sync(&gpu->pdev->dev);

/*
* Replay all remaining submits starting with highest priority
--
2.7.4

2022-07-07 16:58:13

by Akhil P Oommen

[permalink] [raw]
Subject: [PATCH 6/7] drm/msm/a6xx: Improve gpu recovery sequence

We can do a few more things to improve our chance at a successful gpu
recovery, especially during a hangcheck timeout:
1. Halt CP and GMU core
2. Do RBBM GBIF HALT sequence
3. Do a soft reset of GPU core

Signed-off-by: Akhil P Oommen <[email protected]>
---

drivers/gpu/drm/msm/adreno/a6xx.xml.h | 4 ++
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 77 +++++++++++++++++++++--------------
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 7 ++++
3 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx.xml.h b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
index b03e2c4..beea4a7 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
@@ -1413,6 +1413,10 @@ static inline uint32_t REG_A6XX_RBBM_PERFCTR_RBBM_SEL(uint32_t i0) { return 0x00

#define REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL 0x00000011

+#define REG_A6XX_RBBM_GBIF_HALT 0x00000016
+
+#define REG_A6XX_RBBM_GBIF_HALT_ACK 0x00000017
+
#define REG_A6XX_RBBM_WAIT_FOR_GPU_IDLE_CMD 0x0000001c
#define A6XX_RBBM_WAIT_FOR_GPU_IDLE_CMD_WAIT_GPU_IDLE 0x00000001

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 9f76f5b..db05942 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -869,9 +869,47 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu)
(val & 1), 100, 1000);
}

+#define GBIF_CLIENT_HALT_MASK BIT(0)
+#define GBIF_ARB_HALT_MASK BIT(1)
+
+static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
+{
+ struct msm_gpu *gpu = &adreno_gpu->base;
+
+ if (!a6xx_has_gbif(adreno_gpu)) {
+ gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
+ spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
+ 0xf) == 0xf);
+ gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
+
+ return;
+ }
+
+ /* Halt the gx side of GBIF */
+ gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
+ spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
+
+ /* Halt new client requests on GBIF */
+ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
+ spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
+ (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
+
+ /* Halt all AXI requests on GBIF */
+ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
+ spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
+ (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
+
+ /* The GBIF halt needs to be explicitly cleared */
+ gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
+}
+
/* Force the GMU off in case it isn't responsive */
static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
{
+ struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ struct msm_gpu *gpu = &adreno_gpu->base;
+
/* Flush all the queues */
a6xx_hfi_stop(gmu);

@@ -883,6 +921,15 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)

/* Make sure there are no outstanding RPMh votes */
a6xx_gmu_rpmh_off(gmu);
+
+ /* Halt the gmu cm3 core */
+ gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1);
+
+ a6xx_bus_clear_pending_transactions(adreno_gpu);
+
+ /* Reset GPU core blocks */
+ gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, 1);
+ udelay(100);
}

static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
@@ -1010,36 +1057,6 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu)
return true;
}

-#define GBIF_CLIENT_HALT_MASK BIT(0)
-#define GBIF_ARB_HALT_MASK BIT(1)
-
-static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
-{
- struct msm_gpu *gpu = &adreno_gpu->base;
-
- if (!a6xx_has_gbif(adreno_gpu)) {
- gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
- spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
- 0xf) == 0xf);
- gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
-
- return;
- }
-
- /* Halt new client requests on GBIF */
- gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
- spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
- (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
-
- /* Halt all AXI requests on GBIF */
- gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
- spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
- (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
-
- /* The GBIF halt needs to be explicitly cleared */
- gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
-}
-
/* Gracefully try to shut down the GMU and by extension the GPU */
static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
{
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index e956a13..719e419 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -919,6 +919,10 @@ static int hw_init(struct msm_gpu *gpu)
/* Make sure the GMU keeps the GPU on while we set it up */
a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);

+ /* Clear GBIF halt in case GX domain was not collapsed */
+ if (a6xx_has_gbif(adreno_gpu))
+ gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
+
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);

/*
@@ -1208,6 +1212,9 @@ static void a6xx_recover(struct msm_gpu *gpu)
if (hang_debug)
a6xx_dump(gpu);

+ /* Halt SQE first */
+ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
+
/*
* Turn off keep alive that might have been enabled by the hang
* interrupt
--
2.7.4

2022-07-07 17:46:32

by Rob Clark

[permalink] [raw]
Subject: Re: [PATCH 3/7] drm/msm: Fix cx collapse issue during recovery

On Thu, Jul 7, 2022 at 9:11 AM Akhil P Oommen <[email protected]> wrote:
>
> There are some hardware logic under CX domain. For a successful
> recovery, we should ensure cx headswitch collapses to ensure all the
> stale states are cleard out. This is especially true to for a6xx family
> where we can GMU co-processor.
>
> Currently, cx doesn't collapse due to a devlink between gpu and its
> smmu. So the *struct gpu device* needs to be runtime suspended to ensure
> that the iommu driver removes its vote on cx gdsc.
>
> Signed-off-by: Akhil P Oommen <[email protected]>
> ---
>
> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 16 ++++++++++++++--
> drivers/gpu/drm/msm/msm_gpu.c | 2 --
> 2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 42ed9a3..57a7ad5 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -1210,8 +1210,20 @@ static void a6xx_recover(struct msm_gpu *gpu)
> */
> gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
>
> - gpu->funcs->pm_suspend(gpu);
> - gpu->funcs->pm_resume(gpu);
> + /*
> + * Now drop all the pm_runtime usage count to allow cx gdsc to collapse.
> + * First drop the usage count from all active submits
> + */
> + for (i = gpu->active_submits; i > 0; i--)
> + pm_runtime_put(&gpu->pdev->dev);

Would pm_runtime_force_suspend/resume() work instead?

BR,
-R

> +
> + /* And the final one from recover worker */
> + pm_runtime_put_sync(&gpu->pdev->dev);
> +
> + for (i = gpu->active_submits; i > 0; i--)
> + pm_runtime_get(&gpu->pdev->dev);
> +
> + pm_runtime_get_sync(&gpu->pdev->dev);
>
> msm_gpu_hw_init(gpu);
> }
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index f75ff4b..48171b6 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -444,9 +444,7 @@ static void recover_worker(struct kthread_work *work)
> /* retire completed submits, plus the one that hung: */
> retire_submits(gpu);
>
> - pm_runtime_get_sync(&gpu->pdev->dev);
> gpu->funcs->recover(gpu);
> - pm_runtime_put_sync(&gpu->pdev->dev);
>
> /*
> * Replay all remaining submits starting with highest priority
> --
> 2.7.4
>