2024-02-25 14:13:06

by Dmitry Baryshkov

[permalink] [raw]
Subject: [PATCH v3 0/3] drm/msm/dpu: debug commit_done timeouts

In order to debug commit_done timeouts ([1]) display the sticky bits of
the CTL_FLUSH register and capture the devcore dump when the first such
timeout occurs.

[1] https://gitlab.freedesktop.org/drm/msm/-/issues/33

Signed-off-by: Dmitry Baryshkov <[email protected]>
---
Changes in v3:
- Capture the snapshot only on the first comit_done timeout (Abhinav)
- Link to v2: https://lore.kernel.org/r/[email protected]

Changes in v2:
- Added a call to msm_disp_snapshot_state() to trigger devcore dump
(Abhinav)
- Link to v1: https://lore.kernel.org/r/[email protected]

---
Dmitry Baryshkov (3):
drm/msm/dpu: make "vblank timeout" more useful
drm/msm/dpu: split dpu_encoder_wait_for_event into two functions
drm/msm/dpu: capture snapshot on the first commit_done timeout

drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 83 +++++++++++++++++-----
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h | 22 +-----
.../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 2 +-
drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 2 +-
drivers/gpu/drm/msm/msm_drv.h | 10 ---
5 files changed, 69 insertions(+), 50 deletions(-)
---
base-commit: 33e1d31873f87d119e5120b88cd350efa68ef276
change-id: 20240106-fd-dpu-debug-timeout-e917f0bc8063

Best regards,
--
Dmitry Baryshkov <[email protected]>



2024-02-25 14:13:11

by Dmitry Baryshkov

[permalink] [raw]
Subject: [PATCH v3 1/3] drm/msm/dpu: make "vblank timeout" more useful

We have several reports of vblank timeout messages. However after some
debugging it was found that there might be different causes to that.
To allow us to identify the DPU block that gets stuck, include the
actual CTL_FLUSH value into the timeout message.

Signed-off-by: Dmitry Baryshkov <[email protected]>
---
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index 2aa72b578764..6058706f03e4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -480,7 +480,7 @@ static int dpu_encoder_phys_vid_wait_for_commit_done(
(hw_ctl->ops.get_flush_register(hw_ctl) == 0),
msecs_to_jiffies(50));
if (ret <= 0) {
- DPU_ERROR("vblank timeout\n");
+ DPU_ERROR("vblank timeout: %x\n", hw_ctl->ops.get_flush_register(hw_ctl));
return -ETIMEDOUT;
}


--
2.39.2


2024-02-25 14:13:33

by Dmitry Baryshkov

[permalink] [raw]
Subject: [PATCH v3 2/3] drm/msm/dpu: split dpu_encoder_wait_for_event into two functions

Stop multiplexing several events via the dpu_encoder_wait_for_event()
function. Split it into two distinct functions two allow separate
handling of those events.

Signed-off-by: Dmitry Baryshkov <[email protected]>
---
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 74 +++++++++++++++++++++--------
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h | 22 ++-------
drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 2 +-
drivers/gpu/drm/msm/msm_drv.h | 10 ----
4 files changed, 59 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 194dbb08331d..30f349c8a1e5 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -1282,7 +1282,7 @@ static void dpu_encoder_virt_atomic_disable(struct drm_encoder *drm_enc,
trace_dpu_enc_disable(DRMID(drm_enc));

/* wait for idle */
- dpu_encoder_wait_for_event(drm_enc, MSM_ENC_TX_COMPLETE);
+ dpu_encoder_wait_for_tx_complete(drm_enc);

dpu_encoder_resource_control(drm_enc, DPU_ENC_RC_EVENT_PRE_STOP);

@@ -2402,10 +2402,23 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
return &dpu_enc->base;
}

-int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
- enum msm_event_wait event)
+/**
+ * dpu_encoder_wait_for_commit_done() - Wait for encoder to flush pending state
+ * @drm_enc: encoder pointer
+ *
+ * Wait for hardware to have flushed the current pending frames to hardware at
+ * a vblank or ctl_start Encoders will map this differently depending on the
+ * panel type.
+ *
+ * MSM_ENC_TX_COMPLETE - Wait for the hardware to transfer all the pixels to
+ * the panel. Encoders will map this differently
+ * depending on the panel type.
+ * vid mode -> vsync_irq
+ * cmd mode -> pp_done
+ * Return: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
+ */
+int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_enc)
{
- int (*fn_wait)(struct dpu_encoder_phys *phys_enc) = NULL;
struct dpu_encoder_virt *dpu_enc = NULL;
int i, ret = 0;

@@ -2419,23 +2432,46 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
for (i = 0; i < dpu_enc->num_phys_encs; i++) {
struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];

- switch (event) {
- case MSM_ENC_COMMIT_DONE:
- fn_wait = phys->ops.wait_for_commit_done;
- break;
- case MSM_ENC_TX_COMPLETE:
- fn_wait = phys->ops.wait_for_tx_complete;
- break;
- default:
- DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n",
- event);
- return -EINVAL;
+ if (phys->ops.wait_for_commit_done) {
+ DPU_ATRACE_BEGIN("wait_for_commit_done");
+ ret = phys->ops.wait_for_commit_done(phys);
+ DPU_ATRACE_END("wait_for_commit_done");
+ if (ret)
+ return ret;
}
+ }
+
+ return ret;
+}
+
+/**
+ * dpu_encoder_wait_for_tx_complete() - Wait for encoder to transfer pixels to panel
+ * @drm_enc: encoder pointer
+ *
+ * Wait for the hardware to transfer all the pixels to the panel. Encoders will
+ * map this differently depending on the panel type.
+ *
+ * Return: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
+ */
+int dpu_encoder_wait_for_tx_complete(struct drm_encoder *drm_enc)
+{
+ struct dpu_encoder_virt *dpu_enc = NULL;
+ int i, ret = 0;
+
+ if (!drm_enc) {
+ DPU_ERROR("invalid encoder\n");
+ return -EINVAL;
+ }
+ dpu_enc = to_dpu_encoder_virt(drm_enc);
+ DPU_DEBUG_ENC(dpu_enc, "\n");
+
+ for (i = 0; i < dpu_enc->num_phys_encs; i++) {
+ struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];

- if (fn_wait) {
- DPU_ATRACE_BEGIN("wait_for_completion_event");
- ret = fn_wait(phys);
- DPU_ATRACE_END("wait_for_completion_event");
+ if (phys->ops.wait_for_tx_complete) {
+ DPU_ATRACE_BEGIN("wait_for_tx_complete");
+ ret = phys->ops.wait_for_tx_complete(phys);
+ DPU_ATRACE_END("wait_for_tx_complete");
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
index fe6b1d312a74..0c928d1876e4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
@@ -93,25 +93,9 @@ void dpu_encoder_kickoff(struct drm_encoder *encoder);
*/
int dpu_encoder_vsync_time(struct drm_encoder *drm_enc, ktime_t *wakeup_time);

-/**
- * dpu_encoder_wait_for_event - Waits for encoder events
- * @encoder: encoder pointer
- * @event: event to wait for
- * MSM_ENC_COMMIT_DONE - Wait for hardware to have flushed the current pending
- * frames to hardware at a vblank or ctl_start
- * Encoders will map this differently depending on the
- * panel type.
- * vid mode -> vsync_irq
- * cmd mode -> ctl_start
- * MSM_ENC_TX_COMPLETE - Wait for the hardware to transfer all the pixels to
- * the panel. Encoders will map this differently
- * depending on the panel type.
- * vid mode -> vsync_irq
- * cmd mode -> pp_done
- * Returns: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
- */
-int dpu_encoder_wait_for_event(struct drm_encoder *drm_encoder,
- enum msm_event_wait event);
+int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_encoder);
+
+int dpu_encoder_wait_for_tx_complete(struct drm_encoder *drm_encoder);

/*
* dpu_encoder_get_intf_mode - get interface mode of the given encoder
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index d6412395bacc..26b5e54031d9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -476,7 +476,7 @@ static void dpu_kms_wait_for_commit_done(struct msm_kms *kms,
* mode panels. This may be a no-op for command mode panels.
*/
trace_dpu_kms_wait_for_commit_done(DRMID(crtc));
- ret = dpu_encoder_wait_for_event(encoder, MSM_ENC_COMMIT_DONE);
+ ret = dpu_encoder_wait_for_commit_done(encoder);
if (ret && ret != -EWOULDBLOCK) {
DPU_ERROR("wait for commit done returned %d\n", ret);
break;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 762e13e2df74..91cf57f72321 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -74,16 +74,6 @@ enum msm_dsi_controller {
#define MSM_GPU_MAX_RINGS 4
#define MAX_H_TILES_PER_DISPLAY 2

-/**
- * enum msm_event_wait - type of HW events to wait for
- * @MSM_ENC_COMMIT_DONE - wait for the driver to flush the registers to HW
- * @MSM_ENC_TX_COMPLETE - wait for the HW to transfer the frame to panel
- */
-enum msm_event_wait {
- MSM_ENC_COMMIT_DONE = 0,
- MSM_ENC_TX_COMPLETE,
-};
-
/**
* struct msm_display_topology - defines a display topology pipeline
* @num_lm: number of layer mixers used

--
2.39.2


2024-02-25 14:13:46

by Dmitry Baryshkov

[permalink] [raw]
Subject: [PATCH v3 3/3] drm/msm/dpu: capture snapshot on the first commit_done timeout

In order to debug commit_done timeouts, capture the devcoredump state
when the first timeout occurs after the encoder has been enabled.

Signed-off-by: Dmitry Baryshkov <[email protected]>
---
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 30f349c8a1e5..3cae07bf0b9b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -126,6 +126,8 @@ enum dpu_enc_rc_states {
* @base: drm_encoder base class for registration with DRM
* @enc_spinlock: Virtual-Encoder-Wide Spin Lock for IRQ purposes
* @enabled: True if the encoder is active, protected by enc_lock
+ * @commit_done_timedout: True if there has been a timeout on commit after
+ * enabling the encoder.
* @num_phys_encs: Actual number of physical encoders contained.
* @phys_encs: Container of physical encoders managed.
* @cur_master: Pointer to the current master in this mode. Optimization
@@ -172,6 +174,7 @@ struct dpu_encoder_virt {
spinlock_t enc_spinlock;

bool enabled;
+ bool commit_done_timedout;

unsigned int num_phys_encs;
struct dpu_encoder_phys *phys_encs[MAX_PHYS_ENCODERS_PER_VIRTUAL];
@@ -1226,6 +1229,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc,
else if (disp_info->intf_type == INTF_DSI)
dpu_enc->wide_bus_en = msm_dsi_wide_bus_enabled(priv->dsi[index]);

+ dpu_enc->commit_done_timedout = false;
+
mutex_lock(&dpu_enc->enc_lock);
cur_mode = &dpu_enc->base.crtc->state->adjusted_mode;

@@ -2436,6 +2441,10 @@ int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_enc)
DPU_ATRACE_BEGIN("wait_for_commit_done");
ret = phys->ops.wait_for_commit_done(phys);
DPU_ATRACE_END("wait_for_commit_done");
+ if (ret == -ETIMEDOUT && !dpu_enc->commit_done_timedout) {
+ dpu_enc->commit_done_timedout = true;
+ msm_disp_snapshot_state(drm_enc->dev);
+ }
if (ret)
return ret;
}

--
2.39.2


2024-02-25 19:44:27

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH v3 1/3] drm/msm/dpu: make "vblank timeout" more useful



On 2/25/2024 6:12 AM, Dmitry Baryshkov wrote:
> We have several reports of vblank timeout messages. However after some
> debugging it was found that there might be different causes to that.
> To allow us to identify the DPU block that gets stuck, include the
> actual CTL_FLUSH value into the timeout message.
>

the flush register shall also be part of the coredump in patch 3. so why
is this needed?

> Signed-off-by: Dmitry Baryshkov <[email protected]>
> ---
> drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
> index 2aa72b578764..6058706f03e4 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
> @@ -480,7 +480,7 @@ static int dpu_encoder_phys_vid_wait_for_commit_done(
> (hw_ctl->ops.get_flush_register(hw_ctl) == 0),
> msecs_to_jiffies(50));
> if (ret <= 0) {
> - DPU_ERROR("vblank timeout\n");
> + DPU_ERROR("vblank timeout: %x\n", hw_ctl->ops.get_flush_register(hw_ctl));
> return -ETIMEDOUT;
> }
>
>

2024-02-25 19:49:54

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] drm/msm/dpu: split dpu_encoder_wait_for_event into two functions



On 2/25/2024 6:12 AM, Dmitry Baryshkov wrote:
> Stop multiplexing several events via the dpu_encoder_wait_for_event()
> function. Split it into two distinct functions two allow separate
> handling of those events.
>

I understand the idea but would handling of those events be really distinct?

Like if wait_for_commit_done timedout or wait_for_tx_complete timedout,
the handling will be similar from my PoV.

> Signed-off-by: Dmitry Baryshkov <[email protected]>
> ---
> drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 74 +++++++++++++++++++++--------
> drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h | 22 ++-------
> drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 2 +-
> drivers/gpu/drm/msm/msm_drv.h | 10 ----
> 4 files changed, 59 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> index 194dbb08331d..30f349c8a1e5 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> @@ -1282,7 +1282,7 @@ static void dpu_encoder_virt_atomic_disable(struct drm_encoder *drm_enc,
> trace_dpu_enc_disable(DRMID(drm_enc));
>
> /* wait for idle */
> - dpu_encoder_wait_for_event(drm_enc, MSM_ENC_TX_COMPLETE);
> + dpu_encoder_wait_for_tx_complete(drm_enc);
>
> dpu_encoder_resource_control(drm_enc, DPU_ENC_RC_EVENT_PRE_STOP);
>
> @@ -2402,10 +2402,23 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
> return &dpu_enc->base;
> }
>
> -int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
> - enum msm_event_wait event)
> +/**
> + * dpu_encoder_wait_for_commit_done() - Wait for encoder to flush pending state
> + * @drm_enc: encoder pointer
> + *
> + * Wait for hardware to have flushed the current pending frames to hardware at
> + * a vblank or ctl_start Encoders will map this differently depending on the
> + * panel type.
> + *

Missing a '.' between ctl_start and Encoder?

> + * MSM_ENC_TX_COMPLETE - Wait for the hardware to transfer all the pixels to
> + * the panel. Encoders will map this differently
> + * depending on the panel type.
> + * vid mode -> vsync_irq
> + * cmd mode -> pp_done
> + * Return: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
> + */
> +int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_enc)
> {
> - int (*fn_wait)(struct dpu_encoder_phys *phys_enc) = NULL;
> struct dpu_encoder_virt *dpu_enc = NULL;
> int i, ret = 0;
>
> @@ -2419,23 +2432,46 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
> for (i = 0; i < dpu_enc->num_phys_encs; i++) {
> struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
>
> - switch (event) {
> - case MSM_ENC_COMMIT_DONE:
> - fn_wait = phys->ops.wait_for_commit_done;
> - break;
> - case MSM_ENC_TX_COMPLETE:
> - fn_wait = phys->ops.wait_for_tx_complete;
> - break;
> - default:
> - DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n",
> - event);
> - return -EINVAL;
> + if (phys->ops.wait_for_commit_done) {
> + DPU_ATRACE_BEGIN("wait_for_commit_done");
> + ret = phys->ops.wait_for_commit_done(phys);
> + DPU_ATRACE_END("wait_for_commit_done");
> + if (ret)
> + return ret;
> }
> + }
> +
> + return ret;
> +}
> +
> +/**
> + * dpu_encoder_wait_for_tx_complete() - Wait for encoder to transfer pixels to panel
> + * @drm_enc: encoder pointer
> + *
> + * Wait for the hardware to transfer all the pixels to the panel. Encoders will
> + * map this differently depending on the panel type.
> + *
> + * Return: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
> + */
> +int dpu_encoder_wait_for_tx_complete(struct drm_encoder *drm_enc)
> +{
> + struct dpu_encoder_virt *dpu_enc = NULL;
> + int i, ret = 0;
> +
> + if (!drm_enc) {
> + DPU_ERROR("invalid encoder\n");
> + return -EINVAL;
> + }
> + dpu_enc = to_dpu_encoder_virt(drm_enc);
> + DPU_DEBUG_ENC(dpu_enc, "\n");
> +
> + for (i = 0; i < dpu_enc->num_phys_encs; i++) {
> + struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
>
> - if (fn_wait) {
> - DPU_ATRACE_BEGIN("wait_for_completion_event");
> - ret = fn_wait(phys);
> - DPU_ATRACE_END("wait_for_completion_event");
> + if (phys->ops.wait_for_tx_complete) {
> + DPU_ATRACE_BEGIN("wait_for_tx_complete");
> + ret = phys->ops.wait_for_tx_complete(phys);
> + DPU_ATRACE_END("wait_for_tx_complete");
> if (ret)
> return ret;
> }
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
> index fe6b1d312a74..0c928d1876e4 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
> @@ -93,25 +93,9 @@ void dpu_encoder_kickoff(struct drm_encoder *encoder);
> */
> int dpu_encoder_vsync_time(struct drm_encoder *drm_enc, ktime_t *wakeup_time);
>
> -/**
> - * dpu_encoder_wait_for_event - Waits for encoder events
> - * @encoder: encoder pointer
> - * @event: event to wait for
> - * MSM_ENC_COMMIT_DONE - Wait for hardware to have flushed the current pending
> - * frames to hardware at a vblank or ctl_start
> - * Encoders will map this differently depending on the
> - * panel type.
> - * vid mode -> vsync_irq
> - * cmd mode -> ctl_start
> - * MSM_ENC_TX_COMPLETE - Wait for the hardware to transfer all the pixels to
> - * the panel. Encoders will map this differently
> - * depending on the panel type.
> - * vid mode -> vsync_irq
> - * cmd mode -> pp_done
> - * Returns: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
> - */
> -int dpu_encoder_wait_for_event(struct drm_encoder *drm_encoder,
> - enum msm_event_wait event);
> +int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_encoder);
> +
> +int dpu_encoder_wait_for_tx_complete(struct drm_encoder *drm_encoder);
>
> /*
> * dpu_encoder_get_intf_mode - get interface mode of the given encoder
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> index d6412395bacc..26b5e54031d9 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> @@ -476,7 +476,7 @@ static void dpu_kms_wait_for_commit_done(struct msm_kms *kms,
> * mode panels. This may be a no-op for command mode panels.
> */
> trace_dpu_kms_wait_for_commit_done(DRMID(crtc));
> - ret = dpu_encoder_wait_for_event(encoder, MSM_ENC_COMMIT_DONE);
> + ret = dpu_encoder_wait_for_commit_done(encoder);
> if (ret && ret != -EWOULDBLOCK) {
> DPU_ERROR("wait for commit done returned %d\n", ret);
> break;
> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> index 762e13e2df74..91cf57f72321 100644
> --- a/drivers/gpu/drm/msm/msm_drv.h
> +++ b/drivers/gpu/drm/msm/msm_drv.h
> @@ -74,16 +74,6 @@ enum msm_dsi_controller {
> #define MSM_GPU_MAX_RINGS 4
> #define MAX_H_TILES_PER_DISPLAY 2
>
> -/**
> - * enum msm_event_wait - type of HW events to wait for
> - * @MSM_ENC_COMMIT_DONE - wait for the driver to flush the registers to HW
> - * @MSM_ENC_TX_COMPLETE - wait for the HW to transfer the frame to panel
> - */
> -enum msm_event_wait {
> - MSM_ENC_COMMIT_DONE = 0,
> - MSM_ENC_TX_COMPLETE,
> -};
> -
> /**
> * struct msm_display_topology - defines a display topology pipeline
> * @num_lm: number of layer mixers used
>

2024-02-25 19:57:45

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] drm/msm/dpu: capture snapshot on the first commit_done timeout



On 2/25/2024 6:12 AM, Dmitry Baryshkov wrote:
> In order to debug commit_done timeouts, capture the devcoredump state
> when the first timeout occurs after the encoder has been enabled.
>
> Signed-off-by: Dmitry Baryshkov <[email protected]>
> ---
> drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>

This looks fine now. Once we discuss patch 2, I can ack this.

> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> index 30f349c8a1e5..3cae07bf0b9b 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> @@ -126,6 +126,8 @@ enum dpu_enc_rc_states {
> * @base: drm_encoder base class for registration with DRM
> * @enc_spinlock: Virtual-Encoder-Wide Spin Lock for IRQ purposes
> * @enabled: True if the encoder is active, protected by enc_lock
> + * @commit_done_timedout: True if there has been a timeout on commit after
> + * enabling the encoder.
> * @num_phys_encs: Actual number of physical encoders contained.
> * @phys_encs: Container of physical encoders managed.
> * @cur_master: Pointer to the current master in this mode. Optimization
> @@ -172,6 +174,7 @@ struct dpu_encoder_virt {
> spinlock_t enc_spinlock;
>
> bool enabled;
> + bool commit_done_timedout;
>
> unsigned int num_phys_encs;
> struct dpu_encoder_phys *phys_encs[MAX_PHYS_ENCODERS_PER_VIRTUAL];
> @@ -1226,6 +1229,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc,
> else if (disp_info->intf_type == INTF_DSI)
> dpu_enc->wide_bus_en = msm_dsi_wide_bus_enabled(priv->dsi[index]);
>
> + dpu_enc->commit_done_timedout = false;
> +
> mutex_lock(&dpu_enc->enc_lock);
> cur_mode = &dpu_enc->base.crtc->state->adjusted_mode;
>
> @@ -2436,6 +2441,10 @@ int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_enc)
> DPU_ATRACE_BEGIN("wait_for_commit_done");
> ret = phys->ops.wait_for_commit_done(phys);
> DPU_ATRACE_END("wait_for_commit_done");
> + if (ret == -ETIMEDOUT && !dpu_enc->commit_done_timedout) {
> + dpu_enc->commit_done_timedout = true;
> + msm_disp_snapshot_state(drm_enc->dev);
> + }
> if (ret)
> return ret;
> }
>

2024-02-25 20:53:14

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] drm/msm/dpu: split dpu_encoder_wait_for_event into two functions

On Sun, 25 Feb 2024 at 21:49, Abhinav Kumar <[email protected]> wrote:
>
>
>
> On 2/25/2024 6:12 AM, Dmitry Baryshkov wrote:
> > Stop multiplexing several events via the dpu_encoder_wait_for_event()
> > function. Split it into two distinct functions two allow separate
> > handling of those events.
> >
>
> I understand the idea but would handling of those events be really distinct?

We are interested in capturing the state after the first
wait_for_commit_done() timeout. The wait_for_tx_complete doesn't need
such handling. Even if we were to handle it in some way, it would be a
different conditional.

Last but not least, I don't like multiplexing just for the sake of it.
There is nearly no common behaviour.

>
> Like if wait_for_commit_done timedout or wait_for_tx_complete timedout,
> the handling will be similar from my PoV.
>
> > Signed-off-by: Dmitry Baryshkov <[email protected]>
> > ---
> > drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 74 +++++++++++++++++++++--------
> > drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h | 22 ++-------
> > drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 2 +-
> > drivers/gpu/drm/msm/msm_drv.h | 10 ----
> > 4 files changed, 59 insertions(+), 49 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> > index 194dbb08331d..30f349c8a1e5 100644
> > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> > @@ -1282,7 +1282,7 @@ static void dpu_encoder_virt_atomic_disable(struct drm_encoder *drm_enc,
> > trace_dpu_enc_disable(DRMID(drm_enc));
> >
> > /* wait for idle */
> > - dpu_encoder_wait_for_event(drm_enc, MSM_ENC_TX_COMPLETE);
> > + dpu_encoder_wait_for_tx_complete(drm_enc);
> >
> > dpu_encoder_resource_control(drm_enc, DPU_ENC_RC_EVENT_PRE_STOP);
> >
> > @@ -2402,10 +2402,23 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
> > return &dpu_enc->base;
> > }
> >
> > -int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
> > - enum msm_event_wait event)
> > +/**
> > + * dpu_encoder_wait_for_commit_done() - Wait for encoder to flush pending state
> > + * @drm_enc: encoder pointer
> > + *
> > + * Wait for hardware to have flushed the current pending frames to hardware at
> > + * a vblank or ctl_start Encoders will map this differently depending on the
> > + * panel type.
> > + *
>
> Missing a '.' between ctl_start and Encoder?

Ack. Also I should drop the leftovers afterwards.

>
> > + * MSM_ENC_TX_COMPLETE - Wait for the hardware to transfer all the pixels to
> > + * the panel. Encoders will map this differently
> > + * depending on the panel type.
> > + * vid mode -> vsync_irq
> > + * cmd mode -> pp_done
> > + * Return: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
> > + */
> > +int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_enc)
> > {
> > - int (*fn_wait)(struct dpu_encoder_phys *phys_enc) = NULL;
> > struct dpu_encoder_virt *dpu_enc = NULL;
> > int i, ret = 0;
> >
> > @@ -2419,23 +2432,46 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
> > for (i = 0; i < dpu_enc->num_phys_encs; i++) {
> > struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
> >
> > - switch (event) {
> > - case MSM_ENC_COMMIT_DONE:
> > - fn_wait = phys->ops.wait_for_commit_done;
> > - break;
> > - case MSM_ENC_TX_COMPLETE:
> > - fn_wait = phys->ops.wait_for_tx_complete;
> > - break;
> > - default:
> > - DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n",
> > - event);
> > - return -EINVAL;
> > + if (phys->ops.wait_for_commit_done) {
> > + DPU_ATRACE_BEGIN("wait_for_commit_done");
> > + ret = phys->ops.wait_for_commit_done(phys);
> > + DPU_ATRACE_END("wait_for_commit_done");
> > + if (ret)
> > + return ret;
> > }
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +/**
> > + * dpu_encoder_wait_for_tx_complete() - Wait for encoder to transfer pixels to panel
> > + * @drm_enc: encoder pointer
> > + *
> > + * Wait for the hardware to transfer all the pixels to the panel. Encoders will
> > + * map this differently depending on the panel type.
> > + *
> > + * Return: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
> > + */
> > +int dpu_encoder_wait_for_tx_complete(struct drm_encoder *drm_enc)
> > +{
> > + struct dpu_encoder_virt *dpu_enc = NULL;
> > + int i, ret = 0;
> > +
> > + if (!drm_enc) {
> > + DPU_ERROR("invalid encoder\n");
> > + return -EINVAL;
> > + }
> > + dpu_enc = to_dpu_encoder_virt(drm_enc);
> > + DPU_DEBUG_ENC(dpu_enc, "\n");
> > +
> > + for (i = 0; i < dpu_enc->num_phys_encs; i++) {
> > + struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
> >
> > - if (fn_wait) {
> > - DPU_ATRACE_BEGIN("wait_for_completion_event");
> > - ret = fn_wait(phys);
> > - DPU_ATRACE_END("wait_for_completion_event");
> > + if (phys->ops.wait_for_tx_complete) {
> > + DPU_ATRACE_BEGIN("wait_for_tx_complete");
> > + ret = phys->ops.wait_for_tx_complete(phys);
> > + DPU_ATRACE_END("wait_for_tx_complete");
> > if (ret)
> > return ret;
> > }
> > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
> > index fe6b1d312a74..0c928d1876e4 100644
> > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
> > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
> > @@ -93,25 +93,9 @@ void dpu_encoder_kickoff(struct drm_encoder *encoder);
> > */
> > int dpu_encoder_vsync_time(struct drm_encoder *drm_enc, ktime_t *wakeup_time);
> >
> > -/**
> > - * dpu_encoder_wait_for_event - Waits for encoder events
> > - * @encoder: encoder pointer
> > - * @event: event to wait for
> > - * MSM_ENC_COMMIT_DONE - Wait for hardware to have flushed the current pending
> > - * frames to hardware at a vblank or ctl_start
> > - * Encoders will map this differently depending on the
> > - * panel type.
> > - * vid mode -> vsync_irq
> > - * cmd mode -> ctl_start
> > - * MSM_ENC_TX_COMPLETE - Wait for the hardware to transfer all the pixels to
> > - * the panel. Encoders will map this differently
> > - * depending on the panel type.
> > - * vid mode -> vsync_irq
> > - * cmd mode -> pp_done
> > - * Returns: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
> > - */
> > -int dpu_encoder_wait_for_event(struct drm_encoder *drm_encoder,
> > - enum msm_event_wait event);
> > +int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_encoder);
> > +
> > +int dpu_encoder_wait_for_tx_complete(struct drm_encoder *drm_encoder);
> >
> > /*
> > * dpu_encoder_get_intf_mode - get interface mode of the given encoder
> > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> > index d6412395bacc..26b5e54031d9 100644
> > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> > @@ -476,7 +476,7 @@ static void dpu_kms_wait_for_commit_done(struct msm_kms *kms,
> > * mode panels. This may be a no-op for command mode panels.
> > */
> > trace_dpu_kms_wait_for_commit_done(DRMID(crtc));
> > - ret = dpu_encoder_wait_for_event(encoder, MSM_ENC_COMMIT_DONE);
> > + ret = dpu_encoder_wait_for_commit_done(encoder);
> > if (ret && ret != -EWOULDBLOCK) {
> > DPU_ERROR("wait for commit done returned %d\n", ret);
> > break;
> > diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> > index 762e13e2df74..91cf57f72321 100644
> > --- a/drivers/gpu/drm/msm/msm_drv.h
> > +++ b/drivers/gpu/drm/msm/msm_drv.h
> > @@ -74,16 +74,6 @@ enum msm_dsi_controller {
> > #define MSM_GPU_MAX_RINGS 4
> > #define MAX_H_TILES_PER_DISPLAY 2
> >
> > -/**
> > - * enum msm_event_wait - type of HW events to wait for
> > - * @MSM_ENC_COMMIT_DONE - wait for the driver to flush the registers to HW
> > - * @MSM_ENC_TX_COMPLETE - wait for the HW to transfer the frame to panel
> > - */
> > -enum msm_event_wait {
> > - MSM_ENC_COMMIT_DONE = 0,
> > - MSM_ENC_TX_COMPLETE,
> > -};
> > -
> > /**
> > * struct msm_display_topology - defines a display topology pipeline
> > * @num_lm: number of layer mixers used
> >



--
With best wishes
Dmitry

2024-02-25 20:58:24

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH v3 1/3] drm/msm/dpu: make "vblank timeout" more useful

On Sun, 25 Feb 2024 at 21:44, Abhinav Kumar <[email protected]> wrote:
>
>
>
> On 2/25/2024 6:12 AM, Dmitry Baryshkov wrote:
> > We have several reports of vblank timeout messages. However after some
> > debugging it was found that there might be different causes to that.
> > To allow us to identify the DPU block that gets stuck, include the
> > actual CTL_FLUSH value into the timeout message.
> >
>
> the flush register shall also be part of the coredump in patch 3. so why
> is this needed?

I'd prefer to keep it. The devcoredump captures all registers, while
CTL_FLUSH points to the actual bit without the need to analyze the
coredump. At the very least, it allows us to analyze whether the issue
is the same or not (compare SSPP_DMA2 on c630 vs LM_1 on sdm660)
without looking into the dump.

>
> > Signed-off-by: Dmitry Baryshkov <[email protected]>
> > ---
> > drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
> > index 2aa72b578764..6058706f03e4 100644
> > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
> > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
> > @@ -480,7 +480,7 @@ static int dpu_encoder_phys_vid_wait_for_commit_done(
> > (hw_ctl->ops.get_flush_register(hw_ctl) == 0),
> > msecs_to_jiffies(50));
> > if (ret <= 0) {
> > - DPU_ERROR("vblank timeout\n");
> > + DPU_ERROR("vblank timeout: %x\n", hw_ctl->ops.get_flush_register(hw_ctl));
> > return -ETIMEDOUT;
> > }
> >
> >



--
With best wishes
Dmitry

2024-02-25 21:04:04

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH v3 1/3] drm/msm/dpu: make "vblank timeout" more useful



On 2/25/2024 12:57 PM, Dmitry Baryshkov wrote:
> On Sun, 25 Feb 2024 at 21:44, Abhinav Kumar <[email protected]> wrote:
>>
>>
>>
>> On 2/25/2024 6:12 AM, Dmitry Baryshkov wrote:
>>> We have several reports of vblank timeout messages. However after some
>>> debugging it was found that there might be different causes to that.
>>> To allow us to identify the DPU block that gets stuck, include the
>>> actual CTL_FLUSH value into the timeout message.
>>>
>>
>> the flush register shall also be part of the coredump in patch 3. so why
>> is this needed?
>
> I'd prefer to keep it. The devcoredump captures all registers, while
> CTL_FLUSH points to the actual bit without the need to analyze the
> coredump. At the very least, it allows us to analyze whether the issue
> is the same or not (compare SSPP_DMA2 on c630 vs LM_1 on sdm660)
> without looking into the dump.
>

Ok, sg


Reviewed-by: Abhinav Kumar <[email protected]>

>>
>>> Signed-off-by: Dmitry Baryshkov <[email protected]>
>>> ---
>>> drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
>>> index 2aa72b578764..6058706f03e4 100644
>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
>>> @@ -480,7 +480,7 @@ static int dpu_encoder_phys_vid_wait_for_commit_done(
>>> (hw_ctl->ops.get_flush_register(hw_ctl) == 0),
>>> msecs_to_jiffies(50));
>>> if (ret <= 0) {
>>> - DPU_ERROR("vblank timeout\n");
>>> + DPU_ERROR("vblank timeout: %x\n", hw_ctl->ops.get_flush_register(hw_ctl));
>>> return -ETIMEDOUT;
>>> }
>>>
>>>
>
>
>

2024-02-26 01:34:04

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] drm/msm/dpu: split dpu_encoder_wait_for_event into two functions



On 2/25/2024 12:52 PM, Dmitry Baryshkov wrote:
> On Sun, 25 Feb 2024 at 21:49, Abhinav Kumar <[email protected]> wrote:
>>
>>
>>
>> On 2/25/2024 6:12 AM, Dmitry Baryshkov wrote:
>>> Stop multiplexing several events via the dpu_encoder_wait_for_event()
>>> function. Split it into two distinct functions two allow separate
>>> handling of those events.
>>>
>>
>> I understand the idea but would handling of those events be really distinct?
>
> We are interested in capturing the state after the first
> wait_for_commit_done() timeout. The wait_for_tx_complete doesn't need
> such handling. Even if we were to handle it in some way, it would be a
> different conditional.
>

wait_for_tx_complete timeout also needs similar handling.

the timeout mechanisms need to be unified at some point, that time I
will re-visit the need to have a common wait_timeout handler.

Lets see how this code evolves.

So with that nit about the kernel doc addressed,

Reviewed-by: Abhinav Kumar <[email protected]>

> Last but not least, I don't like multiplexing just for the sake of it.
> There is nearly no common behaviour.
>

the multiplexing allows us to have one common timeout path which I think
will eventually happen. So i dont think its true that there is no common
behavior.

>>
>> Like if wait_for_commit_done timedout or wait_for_tx_complete timedout,
>> the handling will be similar from my PoV.
>>
>>> Signed-off-by: Dmitry Baryshkov <[email protected]>
>>> ---
>>> drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 74 +++++++++++++++++++++--------
>>> drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h | 22 ++-------
>>> drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 2 +-
>>> drivers/gpu/drm/msm/msm_drv.h | 10 ----
>>> 4 files changed, 59 insertions(+), 49 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>>> index 194dbb08331d..30f349c8a1e5 100644
>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>>> @@ -1282,7 +1282,7 @@ static void dpu_encoder_virt_atomic_disable(struct drm_encoder *drm_enc,
>>> trace_dpu_enc_disable(DRMID(drm_enc));
>>>
>>> /* wait for idle */
>>> - dpu_encoder_wait_for_event(drm_enc, MSM_ENC_TX_COMPLETE);
>>> + dpu_encoder_wait_for_tx_complete(drm_enc);
>>>
>>> dpu_encoder_resource_control(drm_enc, DPU_ENC_RC_EVENT_PRE_STOP);
>>>
>>> @@ -2402,10 +2402,23 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
>>> return &dpu_enc->base;
>>> }
>>>
>>> -int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
>>> - enum msm_event_wait event)
>>> +/**
>>> + * dpu_encoder_wait_for_commit_done() - Wait for encoder to flush pending state
>>> + * @drm_enc: encoder pointer
>>> + *
>>> + * Wait for hardware to have flushed the current pending frames to hardware at
>>> + * a vblank or ctl_start Encoders will map this differently depending on the
>>> + * panel type.
>>> + *
>>
>> Missing a '.' between ctl_start and Encoder?
>
> Ack. Also I should drop the leftovers afterwards.
>
>>
>>> + * MSM_ENC_TX_COMPLETE - Wait for the hardware to transfer all the pixels to
>>> + * the panel. Encoders will map this differently
>>> + * depending on the panel type.
>>> + * vid mode -> vsync_irq
>>> + * cmd mode -> pp_done
>>> + * Return: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
>>> + */
>>> +int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_enc)
>>> {
>>> - int (*fn_wait)(struct dpu_encoder_phys *phys_enc) = NULL;
>>> struct dpu_encoder_virt *dpu_enc = NULL;
>>> int i, ret = 0;
>>>
>>> @@ -2419,23 +2432,46 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
>>> for (i = 0; i < dpu_enc->num_phys_encs; i++) {
>>> struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
>>>
>>> - switch (event) {
>>> - case MSM_ENC_COMMIT_DONE:
>>> - fn_wait = phys->ops.wait_for_commit_done;
>>> - break;
>>> - case MSM_ENC_TX_COMPLETE:
>>> - fn_wait = phys->ops.wait_for_tx_complete;
>>> - break;
>>> - default:
>>> - DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n",
>>> - event);
>>> - return -EINVAL;
>>> + if (phys->ops.wait_for_commit_done) {
>>> + DPU_ATRACE_BEGIN("wait_for_commit_done");
>>> + ret = phys->ops.wait_for_commit_done(phys);
>>> + DPU_ATRACE_END("wait_for_commit_done");
>>> + if (ret)
>>> + return ret;
>>> }
>>> + }
>>> +
>>> + return ret;
>>> +}
>>> +
>>> +/**
>>> + * dpu_encoder_wait_for_tx_complete() - Wait for encoder to transfer pixels to panel
>>> + * @drm_enc: encoder pointer
>>> + *
>>> + * Wait for the hardware to transfer all the pixels to the panel. Encoders will
>>> + * map this differently depending on the panel type.
>>> + *
>>> + * Return: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
>>> + */
>>> +int dpu_encoder_wait_for_tx_complete(struct drm_encoder *drm_enc)
>>> +{
>>> + struct dpu_encoder_virt *dpu_enc = NULL;
>>> + int i, ret = 0;
>>> +
>>> + if (!drm_enc) {
>>> + DPU_ERROR("invalid encoder\n");
>>> + return -EINVAL;
>>> + }
>>> + dpu_enc = to_dpu_encoder_virt(drm_enc);
>>> + DPU_DEBUG_ENC(dpu_enc, "\n");
>>> +
>>> + for (i = 0; i < dpu_enc->num_phys_encs; i++) {
>>> + struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
>>>
>>> - if (fn_wait) {
>>> - DPU_ATRACE_BEGIN("wait_for_completion_event");
>>> - ret = fn_wait(phys);
>>> - DPU_ATRACE_END("wait_for_completion_event");
>>> + if (phys->ops.wait_for_tx_complete) {
>>> + DPU_ATRACE_BEGIN("wait_for_tx_complete");
>>> + ret = phys->ops.wait_for_tx_complete(phys);
>>> + DPU_ATRACE_END("wait_for_tx_complete");
>>> if (ret)
>>> return ret;
>>> }
>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
>>> index fe6b1d312a74..0c928d1876e4 100644
>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
>>> @@ -93,25 +93,9 @@ void dpu_encoder_kickoff(struct drm_encoder *encoder);
>>> */
>>> int dpu_encoder_vsync_time(struct drm_encoder *drm_enc, ktime_t *wakeup_time);
>>>
>>> -/**
>>> - * dpu_encoder_wait_for_event - Waits for encoder events
>>> - * @encoder: encoder pointer
>>> - * @event: event to wait for
>>> - * MSM_ENC_COMMIT_DONE - Wait for hardware to have flushed the current pending
>>> - * frames to hardware at a vblank or ctl_start
>>> - * Encoders will map this differently depending on the
>>> - * panel type.
>>> - * vid mode -> vsync_irq
>>> - * cmd mode -> ctl_start
>>> - * MSM_ENC_TX_COMPLETE - Wait for the hardware to transfer all the pixels to
>>> - * the panel. Encoders will map this differently
>>> - * depending on the panel type.
>>> - * vid mode -> vsync_irq
>>> - * cmd mode -> pp_done
>>> - * Returns: 0 on success, -EWOULDBLOCK if already signaled, error otherwise
>>> - */
>>> -int dpu_encoder_wait_for_event(struct drm_encoder *drm_encoder,
>>> - enum msm_event_wait event);
>>> +int dpu_encoder_wait_for_commit_done(struct drm_encoder *drm_encoder);
>>> +
>>> +int dpu_encoder_wait_for_tx_complete(struct drm_encoder *drm_encoder);
>>>
>>> /*
>>> * dpu_encoder_get_intf_mode - get interface mode of the given encoder
>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
>>> index d6412395bacc..26b5e54031d9 100644
>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
>>> @@ -476,7 +476,7 @@ static void dpu_kms_wait_for_commit_done(struct msm_kms *kms,
>>> * mode panels. This may be a no-op for command mode panels.
>>> */
>>> trace_dpu_kms_wait_for_commit_done(DRMID(crtc));
>>> - ret = dpu_encoder_wait_for_event(encoder, MSM_ENC_COMMIT_DONE);
>>> + ret = dpu_encoder_wait_for_commit_done(encoder);
>>> if (ret && ret != -EWOULDBLOCK) {
>>> DPU_ERROR("wait for commit done returned %d\n", ret);
>>> break;
>>> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
>>> index 762e13e2df74..91cf57f72321 100644
>>> --- a/drivers/gpu/drm/msm/msm_drv.h
>>> +++ b/drivers/gpu/drm/msm/msm_drv.h
>>> @@ -74,16 +74,6 @@ enum msm_dsi_controller {
>>> #define MSM_GPU_MAX_RINGS 4
>>> #define MAX_H_TILES_PER_DISPLAY 2
>>>
>>> -/**
>>> - * enum msm_event_wait - type of HW events to wait for
>>> - * @MSM_ENC_COMMIT_DONE - wait for the driver to flush the registers to HW
>>> - * @MSM_ENC_TX_COMPLETE - wait for the HW to transfer the frame to panel
>>> - */
>>> -enum msm_event_wait {
>>> - MSM_ENC_COMMIT_DONE = 0,
>>> - MSM_ENC_TX_COMPLETE,
>>> -};
>>> -
>>> /**
>>> * struct msm_display_topology - defines a display topology pipeline
>>> * @num_lm: number of layer mixers used
>>>
>
>
>

2024-02-26 01:36:15

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] drm/msm/dpu: capture snapshot on the first commit_done timeout



On 2/25/2024 11:57 AM, Abhinav Kumar wrote:
>
>
> On 2/25/2024 6:12 AM, Dmitry Baryshkov wrote:
>> In order to debug commit_done timeouts, capture the devcoredump state
>> when the first timeout occurs after the encoder has been enabled.
>>
>> Signed-off-by: Dmitry Baryshkov <[email protected]>
>> ---
>>   drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 9 +++++++++
>>   1 file changed, 9 insertions(+)
>>
>
> This looks fine now. Once we discuss patch 2, I can ack this.
>

Not entirely onboard with patch 2, but lets see how that code evolves

Reviewed-by: Abhinav Kumar <[email protected]>

>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>> index 30f349c8a1e5..3cae07bf0b9b 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>> @@ -126,6 +126,8 @@ enum dpu_enc_rc_states {
>>    * @base:        drm_encoder base class for registration with DRM
>>    * @enc_spinlock:    Virtual-Encoder-Wide Spin Lock for IRQ purposes
>>    * @enabled:        True if the encoder is active, protected by
>> enc_lock
>> + * @commit_done_timedout: True if there has been a timeout on commit
>> after
>> + *            enabling the encoder.
>>    * @num_phys_encs:    Actual number of physical encoders contained.
>>    * @phys_encs:        Container of physical encoders managed.
>>    * @cur_master:        Pointer to the current master in this mode.
>> Optimization
>> @@ -172,6 +174,7 @@ struct dpu_encoder_virt {
>>       spinlock_t enc_spinlock;
>>       bool enabled;
>> +    bool commit_done_timedout;
>>       unsigned int num_phys_encs;
>>       struct dpu_encoder_phys *phys_encs[MAX_PHYS_ENCODERS_PER_VIRTUAL];
>> @@ -1226,6 +1229,8 @@ static void
>> dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc,
>>       else if (disp_info->intf_type == INTF_DSI)
>>           dpu_enc->wide_bus_en =
>> msm_dsi_wide_bus_enabled(priv->dsi[index]);
>> +    dpu_enc->commit_done_timedout = false;
>> +
>>       mutex_lock(&dpu_enc->enc_lock);
>>       cur_mode = &dpu_enc->base.crtc->state->adjusted_mode;
>> @@ -2436,6 +2441,10 @@ int dpu_encoder_wait_for_commit_done(struct
>> drm_encoder *drm_enc)
>>               DPU_ATRACE_BEGIN("wait_for_commit_done");
>>               ret = phys->ops.wait_for_commit_done(phys);
>>               DPU_ATRACE_END("wait_for_commit_done");
>> +            if (ret == -ETIMEDOUT && !dpu_enc->commit_done_timedout) {
>> +                dpu_enc->commit_done_timedout = true;
>> +                msm_disp_snapshot_state(drm_enc->dev);
>> +            }
>>               if (ret)
>>                   return ret;
>>           }
>>