2021-01-28 00:48:41

by Eric Anholt

[permalink] [raw]
Subject: [PATCH 1/3] drm/msm: Fix race of GPU init vs timestamp power management.

We were using the same force-poweron bit in the two codepaths, so they
could race to have one of them lose GPU power early.

Signed-off-by: Eric Anholt <[email protected]>
Cc: [email protected] # v5.9
---
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 25 ++++++++++++++++++++++---
drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 8 ++++++++
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++--
3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 78836b4fb98e..378dc7f190c3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -264,6 +264,16 @@ int _a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state, char
}
name = "GPU_SET";
break;
+ case GMU_OOB_PERFCOUNTER_SET:
+ if (gmu->legacy) {
+ request = GMU_OOB_PERFCOUNTER_REQUEST;
+ ack = GMU_OOB_PERFCOUNTER_ACK;
+ } else {
+ request = GMU_OOB_PERFCOUNTER_REQUEST_NEW;
+ ack = GMU_OOB_PERFCOUNTER_ACK_NEW;
+ }
+ name = "PERFCOUNTER";
+ break;
case GMU_OOB_BOOT_SLUMBER:
request = GMU_OOB_BOOT_SLUMBER_REQUEST;
ack = GMU_OOB_BOOT_SLUMBER_ACK;
@@ -302,9 +312,14 @@ int _a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state, char
void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
{
if (!gmu->legacy) {
- WARN_ON(state != GMU_OOB_GPU_SET);
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_GPU_SET_CLEAR_NEW);
+ if (state == GMU_OOB_GPU_SET) {
+ gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
+ 1 << GMU_OOB_GPU_SET_CLEAR_NEW);
+ } else {
+ WARN_ON(state != GMU_OOB_PERFCOUNTER_SET);
+ gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
+ 1 << GMU_OOB_PERFCOUNTER_CLEAR_NEW);
+ }
return;
}

@@ -313,6 +328,10 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
1 << GMU_OOB_GPU_SET_CLEAR);
break;
+ case GMU_OOB_PERFCOUNTER_SET:
+ gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
+ 1 << GMU_OOB_PERFCOUNTER_CLEAR);
+ break;
case GMU_OOB_BOOT_SLUMBER:
gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
1 << GMU_OOB_BOOT_SLUMBER_CLEAR);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index c6d2bced8e5d..9fa278de2106 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -156,6 +156,7 @@ enum a6xx_gmu_oob_state {
GMU_OOB_BOOT_SLUMBER = 0,
GMU_OOB_GPU_SET,
GMU_OOB_DCVS_SET,
+ GMU_OOB_PERFCOUNTER_SET,
};

/* These are the interrupt / ack bits for each OOB request that are set
@@ -190,6 +191,13 @@ enum a6xx_gmu_oob_state {
#define GMU_OOB_GPU_SET_ACK_NEW 31
#define GMU_OOB_GPU_SET_CLEAR_NEW 31

+#define GMU_OOB_PERFCOUNTER_REQUEST 17
+#define GMU_OOB_PERFCOUNTER_ACK 25
+#define GMU_OOB_PERFCOUNTER_CLEAR 25
+
+#define GMU_OOB_PERFCOUNTER_REQUEST_NEW 28
+#define GMU_OOB_PERFCOUNTER_ACK_NEW 30
+#define GMU_OOB_PERFCOUNTER_CLEAR_NEW 30

void a6xx_hfi_init(struct a6xx_gmu *gmu);
int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index c8a9010c1a1d..7424a70b9d35 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1177,12 +1177,12 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);

/* Force the GPU power on so we can read this register */
- a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
+ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);

*value = gpu_read64(gpu, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
REG_A6XX_RBBM_PERFCTR_CP_0_HI);

- a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
+ a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
return 0;
}

--
2.30.0


2021-01-28 00:50:04

by Eric Anholt

[permalink] [raw]
Subject: [PATCH 3/3] drm/msm: Clean up GMU OOB set/clear handling.

Now that the bug is fixed in the minimal way for stable, go make the
code table-driven.

Signed-off-by: Eric Anholt <[email protected]>
---
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 124 +++++++++++++-------------
drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 55 ++++--------
2 files changed, 77 insertions(+), 102 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 378dc7f190c3..c497e0942141 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -245,47 +245,66 @@ static int a6xx_gmu_hfi_start(struct a6xx_gmu *gmu)
return ret;
}

+struct a6xx_gmu_oob_bits {
+ int set, ack, set_new, ack_new;
+ const char *name;
+};
+
+/* These are the interrupt / ack bits for each OOB request that are set
+ * in a6xx_gmu_set_oob and a6xx_clear_oob
+ */
+static const struct a6xx_gmu_oob_bits a6xx_gmu_oob_bits[] = {
+ [GMU_OOB_GPU_SET] = {
+ .name = "GPU_SET",
+ .set = 16,
+ .ack = 24,
+ .set_new = 30,
+ .ack_new = 31,
+ },
+
+ [GMU_OOB_PERFCOUNTER_SET] = {
+ .name = "PERFCOUNTER",
+ .set = 17,
+ .ack = 25,
+ .set_new = 28,
+ .ack_new = 30,
+ },
+
+ [GMU_OOB_BOOT_SLUMBER] = {
+ .name = "BOOT_SLUMBER",
+ .set = 22,
+ .ack = 30,
+ },
+
+ [GMU_OOB_DCVS_SET] = {
+ .name = "GPU_DCVS",
+ .set = 23,
+ .ack = 31,
+ },
+};
+
/* Trigger a OOB (out of band) request to the GMU */
int _a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state, char *file, int line)
{
int ret;
u32 val;
int request, ack;
- const char *name;

- switch (state) {
- case GMU_OOB_GPU_SET:
- if (gmu->legacy) {
- request = GMU_OOB_GPU_SET_REQUEST;
- ack = GMU_OOB_GPU_SET_ACK;
- } else {
- request = GMU_OOB_GPU_SET_REQUEST_NEW;
- ack = GMU_OOB_GPU_SET_ACK_NEW;
- }
- name = "GPU_SET";
- break;
- case GMU_OOB_PERFCOUNTER_SET:
- if (gmu->legacy) {
- request = GMU_OOB_PERFCOUNTER_REQUEST;
- ack = GMU_OOB_PERFCOUNTER_ACK;
- } else {
- request = GMU_OOB_PERFCOUNTER_REQUEST_NEW;
- ack = GMU_OOB_PERFCOUNTER_ACK_NEW;
- }
- name = "PERFCOUNTER";
- break;
- case GMU_OOB_BOOT_SLUMBER:
- request = GMU_OOB_BOOT_SLUMBER_REQUEST;
- ack = GMU_OOB_BOOT_SLUMBER_ACK;
- name = "BOOT_SLUMBER";
- break;
- case GMU_OOB_DCVS_SET:
- request = GMU_OOB_DCVS_REQUEST;
- ack = GMU_OOB_DCVS_ACK;
- name = "GPU_DCVS";
- break;
- default:
+ if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits))
return -EINVAL;
+
+ if (gmu->legacy) {
+ request = a6xx_gmu_oob_bits[state].set;
+ ack = a6xx_gmu_oob_bits[state].ack;
+ } else {
+ request = a6xx_gmu_oob_bits[state].set_new;
+ ack = a6xx_gmu_oob_bits[state].ack_new;
+ if (!request || !ack) {
+ DRM_DEV_ERROR(gmu->dev,
+ "Invalid non-legacy GMU request %s\n",
+ a6xx_gmu_oob_bits[state].name);
+ return -EINVAL;
+ }
}

/* Trigger the equested OOB operation */
@@ -299,7 +318,7 @@ int _a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state, char
DRM_DEV_ERROR(gmu->dev,
"%s:%d Timeout waiting for GMU OOB set %s: 0x%x\n",
file, line,
- name,
+ a6xx_gmu_oob_bits[state].name,
gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO));

/* Clear the acknowledge interrupt */
@@ -311,36 +330,17 @@ int _a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state, char
/* Clear a pending OOB state in the GMU */
void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
{
- if (!gmu->legacy) {
- if (state == GMU_OOB_GPU_SET) {
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_GPU_SET_CLEAR_NEW);
- } else {
- WARN_ON(state != GMU_OOB_PERFCOUNTER_SET);
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_PERFCOUNTER_CLEAR_NEW);
- }
+ int bit;
+
+ if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits))
return;
- }

- switch (state) {
- case GMU_OOB_GPU_SET:
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_GPU_SET_CLEAR);
- break;
- case GMU_OOB_PERFCOUNTER_SET:
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_PERFCOUNTER_CLEAR);
- break;
- case GMU_OOB_BOOT_SLUMBER:
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_BOOT_SLUMBER_CLEAR);
- break;
- case GMU_OOB_DCVS_SET:
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_DCVS_CLEAR);
- break;
- }
+ if (gmu->legacy)
+ bit = a6xx_gmu_oob_bits[state].ack;
+ else
+ bit = a6xx_gmu_oob_bits[state].ack_new;
+
+ gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, bit);
}

/* Enable CPU control of SPTP power power collapse */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index 9fa278de2106..71dfa60070cc 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -153,52 +153,27 @@ static inline void gmu_write_rscc(struct a6xx_gmu *gmu, u32 offset, u32 value)
*/

enum a6xx_gmu_oob_state {
+ /*
+ * Let the GMU know that a boot or slumber operation has started. The value in
+ * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are
+ * doing
+ */
GMU_OOB_BOOT_SLUMBER = 0,
+ /*
+ * Let the GMU know to not turn off any GPU registers while the CPU is in a
+ * critical section
+ */
GMU_OOB_GPU_SET,
+ /*
+ * Set a new power level for the GPU when the CPU is doing frequency scaling
+ */
GMU_OOB_DCVS_SET,
+ /*
+ * Used to keep the GPU on for CPU-side reads of performance counters.
+ */
GMU_OOB_PERFCOUNTER_SET,
};

-/* These are the interrupt / ack bits for each OOB request that are set
- * in a6xx_gmu_set_oob and a6xx_clear_oob
- */
-
-/*
- * Let the GMU know that a boot or slumber operation has started. The value in
- * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are
- * doing
- */
-#define GMU_OOB_BOOT_SLUMBER_REQUEST 22
-#define GMU_OOB_BOOT_SLUMBER_ACK 30
-#define GMU_OOB_BOOT_SLUMBER_CLEAR 30
-
-/*
- * Set a new power level for the GPU when the CPU is doing frequency scaling
- */
-#define GMU_OOB_DCVS_REQUEST 23
-#define GMU_OOB_DCVS_ACK 31
-#define GMU_OOB_DCVS_CLEAR 31
-
-/*
- * Let the GMU know to not turn off any GPU registers while the CPU is in a
- * critical section
- */
-#define GMU_OOB_GPU_SET_REQUEST 16
-#define GMU_OOB_GPU_SET_ACK 24
-#define GMU_OOB_GPU_SET_CLEAR 24
-
-#define GMU_OOB_GPU_SET_REQUEST_NEW 30
-#define GMU_OOB_GPU_SET_ACK_NEW 31
-#define GMU_OOB_GPU_SET_CLEAR_NEW 31
-
-#define GMU_OOB_PERFCOUNTER_REQUEST 17
-#define GMU_OOB_PERFCOUNTER_ACK 25
-#define GMU_OOB_PERFCOUNTER_CLEAR 25
-
-#define GMU_OOB_PERFCOUNTER_REQUEST_NEW 28
-#define GMU_OOB_PERFCOUNTER_ACK_NEW 30
-#define GMU_OOB_PERFCOUNTER_CLEAR_NEW 30
-
void a6xx_hfi_init(struct a6xx_gmu *gmu);
int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state);
void a6xx_hfi_stop(struct a6xx_gmu *gmu);
--
2.30.0

2021-01-28 00:50:52

by Eric Anholt

[permalink] [raw]
Subject: [PATCH 2/3] drm/msm: Fix races managing the OOB state for timestamp vs timestamps.

Now that we're not racing with GPU setup, also fix races of timestamps
against other timestamps. In CI, we were seeing this path trigger
timeouts on setting the GMU bit, especially on the first set of tests
right after boot (it's probably easier to lose the race than one might
think, given that we start many tests in parallel, and waiting for NFS
to page in code probably means that lots of tests hit the same point
of screen init at the same time).

Signed-off-by: Eric Anholt <[email protected]>
Cc: [email protected] # v5.9
---
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 7424a70b9d35..e8f0b5325a7f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1175,6 +1175,9 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ static DEFINE_MUTEX(perfcounter_oob);
+
+ mutex_lock(&perfcounter_oob);

/* Force the GPU power on so we can read this register */
a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
@@ -1183,6 +1186,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
REG_A6XX_RBBM_PERFCTR_CP_0_HI);

a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
+ mutex_unlock(&perfcounter_oob);
return 0;
}

--
2.30.0

2021-01-28 01:14:05

by Eric Anholt

[permalink] [raw]
Subject: [PATCH 3/3 v2] drm/msm: Clean up GMU OOB set/clear handling.

Now that the bug is fixed in the minimal way for stable, go make the
code table-driven.

Signed-off-by: Eric Anholt <[email protected]>
---

Previous version hadn't been rebased off of a bit of debug code I had,
so it wouldn't cleanly apply.

drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 124 +++++++++++++-------------
drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 55 ++++--------
2 files changed, 77 insertions(+), 102 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index b3318f86aabc..9066e98eb8ef 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -245,47 +245,66 @@ static int a6xx_gmu_hfi_start(struct a6xx_gmu *gmu)
return ret;
}

+struct a6xx_gmu_oob_bits {
+ int set, ack, set_new, ack_new;
+ const char *name;
+};
+
+/* These are the interrupt / ack bits for each OOB request that are set
+ * in a6xx_gmu_set_oob and a6xx_clear_oob
+ */
+static const struct a6xx_gmu_oob_bits a6xx_gmu_oob_bits[] = {
+ [GMU_OOB_GPU_SET] = {
+ .name = "GPU_SET",
+ .set = 16,
+ .ack = 24,
+ .set_new = 30,
+ .ack_new = 31,
+ },
+
+ [GMU_OOB_PERFCOUNTER_SET] = {
+ .name = "PERFCOUNTER",
+ .set = 17,
+ .ack = 25,
+ .set_new = 28,
+ .ack_new = 30,
+ },
+
+ [GMU_OOB_BOOT_SLUMBER] = {
+ .name = "BOOT_SLUMBER",
+ .set = 22,
+ .ack = 30,
+ },
+
+ [GMU_OOB_DCVS_SET] = {
+ .name = "GPU_DCVS",
+ .set = 23,
+ .ack = 31,
+ },
+};
+
/* Trigger a OOB (out of band) request to the GMU */
int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
{
int ret;
u32 val;
int request, ack;
- const char *name;

- switch (state) {
- case GMU_OOB_GPU_SET:
- if (gmu->legacy) {
- request = GMU_OOB_GPU_SET_REQUEST;
- ack = GMU_OOB_GPU_SET_ACK;
- } else {
- request = GMU_OOB_GPU_SET_REQUEST_NEW;
- ack = GMU_OOB_GPU_SET_ACK_NEW;
- }
- name = "GPU_SET";
- break;
- case GMU_OOB_PERFCOUNTER_SET:
- if (gmu->legacy) {
- request = GMU_OOB_PERFCOUNTER_REQUEST;
- ack = GMU_OOB_PERFCOUNTER_ACK;
- } else {
- request = GMU_OOB_PERFCOUNTER_REQUEST_NEW;
- ack = GMU_OOB_PERFCOUNTER_ACK_NEW;
- }
- name = "PERFCOUNTER";
- break;
- case GMU_OOB_BOOT_SLUMBER:
- request = GMU_OOB_BOOT_SLUMBER_REQUEST;
- ack = GMU_OOB_BOOT_SLUMBER_ACK;
- name = "BOOT_SLUMBER";
- break;
- case GMU_OOB_DCVS_SET:
- request = GMU_OOB_DCVS_REQUEST;
- ack = GMU_OOB_DCVS_ACK;
- name = "GPU_DCVS";
- break;
- default:
+ if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits))
return -EINVAL;
+
+ if (gmu->legacy) {
+ request = a6xx_gmu_oob_bits[state].set;
+ ack = a6xx_gmu_oob_bits[state].ack;
+ } else {
+ request = a6xx_gmu_oob_bits[state].set_new;
+ ack = a6xx_gmu_oob_bits[state].ack_new;
+ if (!request || !ack) {
+ DRM_DEV_ERROR(gmu->dev,
+ "Invalid non-legacy GMU request %s\n",
+ a6xx_gmu_oob_bits[state].name);
+ return -EINVAL;
+ }
}

/* Trigger the equested OOB operation */
@@ -298,7 +317,7 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
if (ret)
DRM_DEV_ERROR(gmu->dev,
"Timeout waiting for GMU OOB set %s: 0x%x\n",
- name,
+ a6xx_gmu_oob_bits[state].name,
gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO));

/* Clear the acknowledge interrupt */
@@ -310,36 +329,17 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
/* Clear a pending OOB state in the GMU */
void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
{
- if (!gmu->legacy) {
- if (state == GMU_OOB_GPU_SET) {
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_GPU_SET_CLEAR_NEW);
- } else {
- WARN_ON(state != GMU_OOB_PERFCOUNTER_SET);
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_PERFCOUNTER_CLEAR_NEW);
- }
+ int bit;
+
+ if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits))
return;
- }

- switch (state) {
- case GMU_OOB_GPU_SET:
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_GPU_SET_CLEAR);
- break;
- case GMU_OOB_PERFCOUNTER_SET:
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_PERFCOUNTER_CLEAR);
- break;
- case GMU_OOB_BOOT_SLUMBER:
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_BOOT_SLUMBER_CLEAR);
- break;
- case GMU_OOB_DCVS_SET:
- gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
- 1 << GMU_OOB_DCVS_CLEAR);
- break;
- }
+ if (gmu->legacy)
+ bit = a6xx_gmu_oob_bits[state].ack;
+ else
+ bit = a6xx_gmu_oob_bits[state].ack_new;
+
+ gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, bit);
}

/* Enable CPU control of SPTP power power collapse */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index 9fa278de2106..71dfa60070cc 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -153,52 +153,27 @@ static inline void gmu_write_rscc(struct a6xx_gmu *gmu, u32 offset, u32 value)
*/

enum a6xx_gmu_oob_state {
+ /*
+ * Let the GMU know that a boot or slumber operation has started. The value in
+ * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are
+ * doing
+ */
GMU_OOB_BOOT_SLUMBER = 0,
+ /*
+ * Let the GMU know to not turn off any GPU registers while the CPU is in a
+ * critical section
+ */
GMU_OOB_GPU_SET,
+ /*
+ * Set a new power level for the GPU when the CPU is doing frequency scaling
+ */
GMU_OOB_DCVS_SET,
+ /*
+ * Used to keep the GPU on for CPU-side reads of performance counters.
+ */
GMU_OOB_PERFCOUNTER_SET,
};

-/* These are the interrupt / ack bits for each OOB request that are set
- * in a6xx_gmu_set_oob and a6xx_clear_oob
- */
-
-/*
- * Let the GMU know that a boot or slumber operation has started. The value in
- * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are
- * doing
- */
-#define GMU_OOB_BOOT_SLUMBER_REQUEST 22
-#define GMU_OOB_BOOT_SLUMBER_ACK 30
-#define GMU_OOB_BOOT_SLUMBER_CLEAR 30
-
-/*
- * Set a new power level for the GPU when the CPU is doing frequency scaling
- */
-#define GMU_OOB_DCVS_REQUEST 23
-#define GMU_OOB_DCVS_ACK 31
-#define GMU_OOB_DCVS_CLEAR 31
-
-/*
- * Let the GMU know to not turn off any GPU registers while the CPU is in a
- * critical section
- */
-#define GMU_OOB_GPU_SET_REQUEST 16
-#define GMU_OOB_GPU_SET_ACK 24
-#define GMU_OOB_GPU_SET_CLEAR 24
-
-#define GMU_OOB_GPU_SET_REQUEST_NEW 30
-#define GMU_OOB_GPU_SET_ACK_NEW 31
-#define GMU_OOB_GPU_SET_CLEAR_NEW 31
-
-#define GMU_OOB_PERFCOUNTER_REQUEST 17
-#define GMU_OOB_PERFCOUNTER_ACK 25
-#define GMU_OOB_PERFCOUNTER_CLEAR 25
-
-#define GMU_OOB_PERFCOUNTER_REQUEST_NEW 28
-#define GMU_OOB_PERFCOUNTER_ACK_NEW 30
-#define GMU_OOB_PERFCOUNTER_CLEAR_NEW 30
-
void a6xx_hfi_init(struct a6xx_gmu *gmu);
int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state);
void a6xx_hfi_stop(struct a6xx_gmu *gmu);
--
2.30.0

2021-01-28 20:46:09

by Rob Clark

[permalink] [raw]
Subject: Re: [PATCH 2/3] drm/msm: Fix races managing the OOB state for timestamp vs timestamps.

On Wed, Jan 27, 2021 at 3:39 PM Eric Anholt <[email protected]> wrote:
>
> Now that we're not racing with GPU setup, also fix races of timestamps
> against other timestamps. In CI, we were seeing this path trigger
> timeouts on setting the GMU bit, especially on the first set of tests
> right after boot (it's probably easier to lose the race than one might
> think, given that we start many tests in parallel, and waiting for NFS
> to page in code probably means that lots of tests hit the same point
> of screen init at the same time).

Could you add the error msg to the commit msg, to make it more easily
searchable?

BR,
-R

> Signed-off-by: Eric Anholt <[email protected]>
> Cc: [email protected] # v5.9
> ---
> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 7424a70b9d35..e8f0b5325a7f 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -1175,6 +1175,9 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
> {
> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> + static DEFINE_MUTEX(perfcounter_oob);
> +
> + mutex_lock(&perfcounter_oob);
>
> /* Force the GPU power on so we can read this register */
> a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
> @@ -1183,6 +1186,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
> REG_A6XX_RBBM_PERFCTR_CP_0_HI);
>
> a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
> + mutex_unlock(&perfcounter_oob);
> return 0;
> }
>
> --
> 2.30.0
>

2021-01-29 23:52:02

by Jordan Crouse

[permalink] [raw]
Subject: Re: [PATCH 1/3] drm/msm: Fix race of GPU init vs timestamp power management.

On Thu, Jan 28, 2021 at 11:17:16AM -0800, Eric Anholt wrote:
> On Thu, Jan 28, 2021 at 10:52 AM Jordan Crouse <[email protected]> wrote:
> >
> > On Wed, Jan 27, 2021 at 03:39:44PM -0800, Eric Anholt wrote:
> > > We were using the same force-poweron bit in the two codepaths, so they
> > > could race to have one of them lose GPU power early.
> > >
> > > Signed-off-by: Eric Anholt <[email protected]>
> > > Cc: [email protected] # v5.9
> >
> > You can add:
> > Fixes: 4b565ca5a2cb ("drm/msm: Add A6XX device support")
> >
> > Because that was my ugly.
> >
> > Reviewed-by: Jordan Crouse <[email protected]>
>
> I only pointed it at 5.9 because it looked like it would probably
> conflict against older branches. I can add the fixes tag if you'd
> like, though.

Fair enough. It is a good bug to fix but not if there are a lot of conflicts to
deal with.

Jordan
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project