Memory barriers help ensure instruction ordering, NOT time and order
of actual write arrival at other observers (e.g. memory-mapped IP).
On architectures employing weak memory ordering, the latter can be a
giant pain point, and it has been as part of this driver.
Moreover, the gpu_/gmu_ accessors already use non-relaxed versions of
readl/writel, which include r/w (respectively) barriers.
Replace the barriers with a readback that ensures the previous writes
have exited the write buffer (as the CPU must flush the write to the
register it's trying to read back) and subsequently remove the hack
introduced in commit b77532803d11 ("drm/msm/a6xx: Poll for GBIF unhalt
status in hw_init").
Fixes: b77532803d11 ("drm/msm/a6xx: Poll for GBIF unhalt status in hw_init")
Signed-off-by: Konrad Dybcio <[email protected]>
---
Changes in v2:
* Introduce gpu_write_flush() and use it
* Don't accidentally break a630 by trying to write to non-existent GBIF
---
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 4 +---
drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 10 ++++++++++
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 16 ++++------------
drivers/gpu/drm/msm/msm_gpu.h | 14 ++++++++++++--
4 files changed, 27 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 0e3dfd4c2bc8..fb2f8a03da41 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -466,9 +466,7 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu)
int ret;
u32 val;
- gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1 << 1);
- /* Wait for the register to finish posting */
- wmb();
+ gmu_write_flush(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1));
ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val,
val & (1 << 1), 100, 10000);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index 94b6c5cab6f4..ab7f581f0d24 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -111,6 +111,16 @@ static inline void gmu_write(struct a6xx_gmu *gmu, u32 offset, u32 value)
writel(value, gmu->mmio + (offset << 2));
}
+/*
+ * Use for timing-critical writes that must reach the hardware immediately
+ * (to work around write buffering), e.g. for reset registers.
+ */
+static inline void gmu_write_flush(struct a6xx_gmu *gmu, u32 offset, u32 value)
+{
+ gmu_write(gmu, offset, value);
+ gmu_read(gmu, offset);
+}
+
static inline void
gmu_write_bulk(struct a6xx_gmu *gmu, u32 offset, const u32 *data, u32 size)
{
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index cf0b1de1c071..ef7eaa6d5e5c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1714,21 +1714,13 @@ static int hw_init(struct msm_gpu *gpu)
/* Clear GBIF halt in case GX domain was not collapsed */
if (adreno_is_a619_holi(adreno_gpu)) {
- gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
- gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
- /* Let's make extra sure that the GPU can access the memory.. */
- mb();
+ gpu_write_flush(gpu, REG_A6XX_GBIF_HALT, 0);
+ gpu_write_flush(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
} else if (a6xx_has_gbif(adreno_gpu)) {
- gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
- gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
- /* Let's make extra sure that the GPU can access the memory.. */
- mb();
+ gpu_write_flush(gpu, REG_A6XX_GBIF_HALT, 0);
+ gpu_write_flush(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
}
- /* Some GPUs are stubborn and take their sweet time to unhalt GBIF! */
- if (adreno_is_a7xx(adreno_gpu) && a6xx_has_gbif(adreno_gpu))
- spin_until(!gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK));
-
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
if (adreno_is_a619_holi(adreno_gpu))
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index a0c1bd6d1d5b..45d00acd5b1b 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -553,14 +553,24 @@ struct msm_gpu_state {
struct msm_gpu_state_bo *bos;
};
+static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
+{
+ return readl(gpu->mmio + (reg << 2));
+}
+
static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
{
writel(data, gpu->mmio + (reg << 2));
}
-static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
+/*
+ * Use for timing-critical writes that must reach the hardware immediately
+ * (to work around write buffering), e.g. for reset registers.
+ */
+static inline void gpu_write_flush(struct msm_gpu *gpu, u32 reg, u32 data)
{
- return readl(gpu->mmio + (reg << 2));
+ gpu_write(gpu, reg, data);
+ gpu_read(gpu, reg);
}
static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
---
base-commit: ec2d9beb604a623a9f5308f7abcff3561e08c155
change-id: 20240509-topic-adreno-a8939b92f625
Best regards,
--
Konrad Dybcio <[email protected]>