2021-04-28 20:56:19

by Rob Clark

[permalink] [raw]
Subject: [PATCH 0/2] drm/msm: Smooth out ringbuffer-full handling

From: Rob Clark <[email protected]>

With some recent userspace work to allow more rendering to be merged
into a single SUBMIT ioctl, I realized we have some sharp edges around
running out of free ringbuffer space.

1) Currently we only flush once all the cmds (or rather IBs to the cmd
buffer) are written into the ringbuffer. Which places a restriction
that the submit must fit in the rb. Which means slightly less than
2k cmds per submit, after accounting for some of the other packets
needed.
2) Currently, for devices that use RPTR shadow, we only write the
CP_WHERE_AM_I packet at the end of the submit, so we aren't seeing
partial progress that the GPU is making chewing through previous
large submits
3) We spin for up to 1sec waiting for rb space, and then give up and
proceed to overwrite the packets that that CP is currently chewing
on.. which goes badly. If userspace is submitting >1sec of work
per submit ioctl, this means we spin for a long time, and then
corrupt the rb anyways.

This patchset doesn't completely address #1. And in general we don't
want to be uninteruptably blocking for so much time.. but this will
require some more extensive changes.

What it does do is address #2 by periodically emitting a CP_WHERE_AM_I,
and #3 by adding detection and error handling for rb overflow, returning
-ENOSPC when that happens.

Rob Clark (2):
drm/msm: Handle ringbuffer overflow
drm/msm: Periodically update RPTR shadow

drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 32 ++++++++++++++++++++----
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 30 +++++++++++++++++-----
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 24 +++++++++++++++++-
drivers/gpu/drm/msm/msm_gem_submit.c | 7 +++++-
drivers/gpu/drm/msm/msm_gpu.c | 33 +++++++++++++++++++++++--
drivers/gpu/drm/msm/msm_gpu.h | 2 +-
drivers/gpu/drm/msm/msm_ringbuffer.h | 5 ++++
7 files changed, 117 insertions(+), 16 deletions(-)

--
2.30.2


2021-04-28 21:08:57

by Rob Clark

[permalink] [raw]
Subject: [PATCH 2/2] drm/msm: Periodically update RPTR shadow

From: Rob Clark <[email protected]>

On a5xx and a6xx devices that are using CP_WHERE_AM_I to update a
ringbuffer read-ptr shadow value, periodically emit a CP_WHERE_AM_I
every 32 commands, so that a later submit waiting for ringbuffer
space to become available sees partial progress, rather than not
seeing rptr advance at all until the GPU gets to the end of the
submit that it is currently chewing on.

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 29 ++++++++++++++++++++++-----
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 27 +++++++++++++++++++------
2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 0c8faad3b328..5202f1498a48 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -18,6 +18,18 @@ static void a5xx_dump(struct msm_gpu *gpu);

#define GPU_PAS_ID 13

+static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+
+ if (a5xx_gpu->has_whereami) {
+ OUT_PKT7(ring, CP_WHERE_AM_I, 2);
+ OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
+ OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
+ }
+}
+
void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
bool sync)
{
@@ -30,11 +42,8 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
* Most flush operations need to issue a WHERE_AM_I opcode to sync up
* the rptr shadow
*/
- if (a5xx_gpu->has_whereami && sync) {
- OUT_PKT7(ring, CP_WHERE_AM_I, 2);
- OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
- OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
- }
+ if (sync)
+ update_shadow_rptr(gpu, ring);

if (unlikely(ring->overflow))
return;
@@ -171,6 +180,16 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
ibs++;
break;
}
+
+ /*
+ * Periodically update shadow-wptr if needed, so that we
+ * can see partial progress of submits with large # of
+ * cmds.. otherwise we could needlessly stall waiting for
+ * ringbuffer state, simply due to looking at a shadow
+ * rptr value that has not been updated
+ */
+ if ((ibs % 32) == 0)
+ update_shadow_rptr(gpu, ring);
}

/*
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 4a4728a774c0..2986e36ffd8d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -52,21 +52,25 @@ static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
return true;
}

-static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
- uint32_t wptr;
- unsigned long flags;

/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
- struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-
OUT_PKT7(ring, CP_WHERE_AM_I, 2);
OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
}
+}
+
+static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ uint32_t wptr;
+ unsigned long flags;
+
+ update_shadow_rptr(gpu, ring);

if (unlikely(ring->overflow))
return;
@@ -148,7 +152,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = submit->ring;
- unsigned int i;
+ unsigned int i, ibs = 0;

a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);

@@ -184,8 +188,19 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
OUT_RING(ring, submit->cmd[i].size);
+ ibs++;
break;
}
+
+ /*
+ * Periodically update shadow-wptr if needed, so that we
+ * can see partial progress of submits with large # of
+ * cmds.. otherwise we could needlessly stall waiting for
+ * ringbuffer state, simply due to looking at a shadow
+ * rptr value that has not been updated
+ */
+ if ((ibs % 32) == 0)
+ update_shadow_rptr(gpu, ring);
}

get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
--
2.30.2

2021-05-02 19:51:20

by Jordan Crouse

[permalink] [raw]
Subject: Re: [PATCH 2/2] drm/msm: Periodically update RPTR shadow

On Wed, Apr 28, 2021 at 12:36:49PM -0700, Rob Clark wrote:
> From: Rob Clark <[email protected]>
>
> On a5xx and a6xx devices that are using CP_WHERE_AM_I to update a
> ringbuffer read-ptr shadow value, periodically emit a CP_WHERE_AM_I
> every 32 commands, so that a later submit waiting for ringbuffer
> space to become available sees partial progress, rather than not
> seeing rptr advance at all until the GPU gets to the end of the
> submit that it is currently chewing on.

Acked-by: Jordan Crouse <[email protected]>

> Signed-off-by: Rob Clark <[email protected]>
> ---
> drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 29 ++++++++++++++++++++++-----
> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 27 +++++++++++++++++++------
> 2 files changed, 45 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index 0c8faad3b328..5202f1498a48 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -18,6 +18,18 @@ static void a5xx_dump(struct msm_gpu *gpu);
>
> #define GPU_PAS_ID 13
>
> +static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +{
> + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
> +
> + if (a5xx_gpu->has_whereami) {
> + OUT_PKT7(ring, CP_WHERE_AM_I, 2);
> + OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
> + OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
> + }
> +}
> +
> void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
> bool sync)
> {
> @@ -30,11 +42,8 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
> * Most flush operations need to issue a WHERE_AM_I opcode to sync up
> * the rptr shadow
> */
> - if (a5xx_gpu->has_whereami && sync) {
> - OUT_PKT7(ring, CP_WHERE_AM_I, 2);
> - OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
> - OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
> - }
> + if (sync)
> + update_shadow_rptr(gpu, ring);
>
> if (unlikely(ring->overflow))
> return;
> @@ -171,6 +180,16 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> ibs++;
> break;
> }
> +
> + /*
> + * Periodically update shadow-wptr if needed, so that we
> + * can see partial progress of submits with large # of
> + * cmds.. otherwise we could needlessly stall waiting for
> + * ringbuffer state, simply due to looking at a shadow
> + * rptr value that has not been updated
> + */
> + if ((ibs % 32) == 0)
> + update_shadow_rptr(gpu, ring);
> }
>
> /*
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 4a4728a774c0..2986e36ffd8d 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -52,21 +52,25 @@ static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> return true;
> }
>
> -static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> {
> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> - uint32_t wptr;
> - unsigned long flags;
>
> /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
> if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
> - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> -
> OUT_PKT7(ring, CP_WHERE_AM_I, 2);
> OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
> OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
> }
> +}
> +
> +static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +{
> + uint32_t wptr;
> + unsigned long flags;
> +
> + update_shadow_rptr(gpu, ring);
>
> if (unlikely(ring->overflow))
> return;
> @@ -148,7 +152,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> struct msm_ringbuffer *ring = submit->ring;
> - unsigned int i;
> + unsigned int i, ibs = 0;
>
> a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
>
> @@ -184,8 +188,19 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
> OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
> OUT_RING(ring, submit->cmd[i].size);
> + ibs++;
> break;
> }
> +
> + /*
> + * Periodically update shadow-wptr if needed, so that we
> + * can see partial progress of submits with large # of
> + * cmds.. otherwise we could needlessly stall waiting for
> + * ringbuffer state, simply due to looking at a shadow
> + * rptr value that has not been updated
> + */
> + if ((ibs % 32) == 0)
> + update_shadow_rptr(gpu, ring);
> }
>
> get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
> --
> 2.30.2
>