Once we push the job, the scheduler could run it and free it. So, if
we want to reference their fences, we need to grab them before then.
I haven't seen this happen in many days of conformance test runtime,
but let's still close the race.
Signed-off-by: Eric Anholt <[email protected]>
Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
---
drivers/gpu/drm/v3d/v3d_drv.h | 5 +++++
drivers/gpu/drm/v3d/v3d_gem.c | 8 ++++++--
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 5042573e97f4..83c55ab6e1c0 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -204,6 +204,11 @@ struct v3d_exec_info {
*/
struct dma_fence *bin_done_fence;
+ /* Fence for when the scheduler considers the render to be
+ * done, for when the BOs reservations should be complete.
+ */
+ struct dma_fence *render_done_fence;
+
struct kref refcount;
/* This is the array of BOs that were looked up at the start of exec. */
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index e1fcbb4cd0ae..c98fbfbdb68e 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -209,7 +209,7 @@ v3d_flush_caches(struct v3d_dev *v3d)
static void
v3d_attach_object_fences(struct v3d_exec_info *exec)
{
- struct dma_fence *out_fence = &exec->render.base.s_fence->finished;
+ struct dma_fence *out_fence = exec->render_done_fence;
struct v3d_bo *bo;
int i;
@@ -409,6 +409,7 @@ v3d_exec_cleanup(struct kref *ref)
dma_fence_put(exec->render.done_fence);
dma_fence_put(exec->bin_done_fence);
+ dma_fence_put(exec->render_done_fence);
for (i = 0; i < exec->bo_count; i++)
drm_gem_object_put_unlocked(&exec->bo[i]->base);
@@ -574,6 +575,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail_unreserve;
+ exec->render_done_fence =
+ dma_fence_get(&exec->render.base.s_fence->finished);
+
kref_get(&exec->refcount); /* put by scheduler job completion */
drm_sched_entity_push_job(&exec->render.base,
&v3d_priv->sched_entity[V3D_RENDER]);
@@ -587,7 +591,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
sync_out = drm_syncobj_find(file_priv, args->out_sync);
if (sync_out) {
drm_syncobj_replace_fence(sync_out,
- &exec->render.base.s_fence->finished);
+ exec->render_done_fence);
drm_syncobj_put(sync_out);
}
--
2.18.0
Since this is UAPI, it's good to document what exactly the guarantees
we're providing are.
Signed-off-by: Eric Anholt <[email protected]>
---
include/uapi/drm/v3d_drm.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 7b6627783608..f446656d00b1 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -58,6 +58,11 @@ struct drm_v3d_submit_cl {
* coordinate shader to determine where primitives land on the screen,
* then writes out the state updates and draw calls necessary per tile
* to the tile allocation BO.
+ *
+ * This BCL will block on any previous BCL submitted on the
+ * same FD, but not on any RCL or BCLs submitted by other
+ * clients -- that is left up to the submitter to control
+ * using in_sync_bcl if necessary.
*/
__u32 bcl_start;
@@ -69,6 +74,11 @@ struct drm_v3d_submit_cl {
* This is the second set of commands executed, which will either
* execute the tiles that have been set up by the BCL, or a fixed set
* of tiles (in the case of RCL-only blits).
+ *
+ * This RCL will block on this submit's BCL, and any previous
+ * RCL submitted on the same FD, but not on any RCL or BCLs
+ * submitted by other clients -- that is left up to the
+ * submitter to control using in_sync_rcl if necessary.
*/
__u32 rcl_start;
--
2.18.0
Fixes an oops reading this debugfs entry on BCM7278.
Signed-off-by: Eric Anholt <[email protected]>
---
drivers/gpu/drm/v3d/v3d_debugfs.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index d48008adb085..eb2b2d2f8553 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -71,10 +71,13 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
V3D_READ(v3d_hub_reg_defs[i].reg));
}
- for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
- seq_printf(m, "%s (0x%04x): 0x%08x\n",
- v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg,
- V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
+ if (v3d->ver < 41) {
+ for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ v3d_gca_reg_defs[i].name,
+ v3d_gca_reg_defs[i].reg,
+ V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
+ }
}
for (core = 0; core < v3d->cores; core++) {
--
2.18.0
This adds just enough performance counter support to measure the
clock. We don't have linux kernel drivers for the clock driving the
HW, and this was useful for determining that the V3D HW is running on
a slow clock, not that the driver was slow.
Signed-off-by: Eric Anholt <[email protected]>
---
drivers/gpu/drm/v3d/v3d_debugfs.c | 35 +++++++++++++++++++++++++++++++
drivers/gpu/drm/v3d/v3d_regs.h | 30 ++++++++++++++++++++++++++
2 files changed, 65 insertions(+)
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 4db62c545748..d48008adb085 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -176,9 +176,44 @@ static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
return 0;
}
+static int v3d_measure_clock(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ uint32_t cycles;
+ int core = 0;
+ int measure_ms = 1000;
+
+ if (v3d->ver >= 40) {
+ V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3,
+ V3D_SET_FIELD(V3D_PCTR_CYCLE_COUNT,
+ V3D_PCTR_S0));
+ V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1);
+ V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1);
+ } else {
+ V3D_CORE_WRITE(core, V3D_V3_PCTR_0_PCTRS0,
+ V3D_PCTR_CYCLE_COUNT);
+ V3D_CORE_WRITE(core, V3D_V3_PCTR_0_CLR, 1);
+ V3D_CORE_WRITE(core, V3D_V3_PCTR_0_EN,
+ V3D_V3_PCTR_0_EN_ENABLE |
+ 1);
+ }
+ msleep(measure_ms);
+ cycles = V3D_CORE_READ(core, V3D_PCTR_0_PCTR0);
+
+ seq_printf(m, "cycles: %d (%d.%d Mhz)\n",
+ cycles,
+ cycles / (measure_ms * 1000),
+ (cycles / (measure_ms * 100)) % 10);
+
+ return 0;
+}
+
static const struct drm_info_list v3d_debugfs_list[] = {
{"v3d_ident", v3d_v3d_debugfs_ident, 0},
{"v3d_regs", v3d_v3d_debugfs_regs, 0},
+ {"measure_clock", v3d_measure_clock, 0},
{"bo_stats", v3d_debugfs_bo_stats, 0},
};
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
index 854046565989..c3a5e4e44f73 100644
--- a/drivers/gpu/drm/v3d/v3d_regs.h
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -267,6 +267,36 @@
# define V3D_PTB_BXCF_RWORDERDISA BIT(1)
# define V3D_PTB_BXCF_CLIPDISA BIT(0)
+#define V3D_V3_PCTR_0_EN 0x00674
+#define V3D_V3_PCTR_0_EN_ENABLE BIT(31)
+#define V3D_V4_PCTR_0_EN 0x00650
+/* When a bit is set, resets the counter to 0. */
+#define V3D_V3_PCTR_0_CLR 0x00670
+#define V3D_V4_PCTR_0_CLR 0x00654
+#define V3D_PCTR_0_OVERFLOW 0x00658
+
+#define V3D_V3_PCTR_0_PCTRS0 0x00684
+#define V3D_V3_PCTR_0_PCTRS15 0x00660
+#define V3D_V3_PCTR_0_PCTRSX(x) (V3D_V3_PCTR_0_PCTRS0 + \
+ 4 * (x))
+/* Each src reg muxes four counters each. */
+#define V3D_V4_PCTR_0_SRC_0_3 0x00660
+#define V3D_V4_PCTR_0_SRC_28_31 0x0067c
+# define V3D_PCTR_S0_MASK V3D_MASK(6, 0)
+# define V3D_PCTR_S0_SHIFT 0
+# define V3D_PCTR_S1_MASK V3D_MASK(14, 8)
+# define V3D_PCTR_S1_SHIFT 8
+# define V3D_PCTR_S2_MASK V3D_MASK(22, 16)
+# define V3D_PCTR_S2_SHIFT 16
+# define V3D_PCTR_S3_MASK V3D_MASK(30, 24)
+# define V3D_PCTR_S3_SHIFT 24
+# define V3D_PCTR_CYCLE_COUNT 32
+
+/* Output values of the counters. */
+#define V3D_PCTR_0_PCTR0 0x00680
+#define V3D_PCTR_0_PCTR31 0x006fc
+#define V3D_PCTR_0_PCTRX(x) (V3D_PCTR_0_PCTR0 + \
+ 4 * (x))
#define V3D_GMP_STATUS 0x00800
# define V3D_GMP_STATUS_GMPRST BIT(31)
# define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24)
--
2.18.0
On Fri, 28 Sep 2018 16:21:23 -0700
Eric Anholt <[email protected]> wrote:
> Once we push the job, the scheduler could run it and free it. So, if
> we want to reference their fences, we need to grab them before then.
> I haven't seen this happen in many days of conformance test runtime,
> but let's still close the race.
>
> Signed-off-by: Eric Anholt <[email protected]>
> Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
Reviewed-by: Boris Brezillon <[email protected]>
> ---
> drivers/gpu/drm/v3d/v3d_drv.h | 5 +++++
> drivers/gpu/drm/v3d/v3d_gem.c | 8 ++++++--
> 2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index 5042573e97f4..83c55ab6e1c0 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -204,6 +204,11 @@ struct v3d_exec_info {
> */
> struct dma_fence *bin_done_fence;
>
> + /* Fence for when the scheduler considers the render to be
> + * done, for when the BOs reservations should be complete.
> + */
> + struct dma_fence *render_done_fence;
> +
> struct kref refcount;
>
> /* This is the array of BOs that were looked up at the start of exec. */
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index e1fcbb4cd0ae..c98fbfbdb68e 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -209,7 +209,7 @@ v3d_flush_caches(struct v3d_dev *v3d)
> static void
> v3d_attach_object_fences(struct v3d_exec_info *exec)
> {
> - struct dma_fence *out_fence = &exec->render.base.s_fence->finished;
> + struct dma_fence *out_fence = exec->render_done_fence;
> struct v3d_bo *bo;
> int i;
>
> @@ -409,6 +409,7 @@ v3d_exec_cleanup(struct kref *ref)
> dma_fence_put(exec->render.done_fence);
>
> dma_fence_put(exec->bin_done_fence);
> + dma_fence_put(exec->render_done_fence);
>
> for (i = 0; i < exec->bo_count; i++)
> drm_gem_object_put_unlocked(&exec->bo[i]->base);
> @@ -574,6 +575,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
> if (ret)
> goto fail_unreserve;
>
> + exec->render_done_fence =
> + dma_fence_get(&exec->render.base.s_fence->finished);
> +
> kref_get(&exec->refcount); /* put by scheduler job completion */
> drm_sched_entity_push_job(&exec->render.base,
> &v3d_priv->sched_entity[V3D_RENDER]);
> @@ -587,7 +591,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
> sync_out = drm_syncobj_find(file_priv, args->out_sync);
> if (sync_out) {
> drm_syncobj_replace_fence(sync_out,
> - &exec->render.base.s_fence->finished);
> + exec->render_done_fence);
> drm_syncobj_put(sync_out);
> }
>
On Fri, 28 Sep 2018 16:21:24 -0700
Eric Anholt <[email protected]> wrote:
> This adds just enough performance counter support to measure the
> clock. We don't have linux kernel drivers for the clock driving the
> HW, and this was useful for determining that the V3D HW is running on
> a slow clock, not that the driver was slow.
>
> Signed-off-by: Eric Anholt <[email protected]>
Reviewed-by: Boris Brezillon <[email protected]>
> ---
> drivers/gpu/drm/v3d/v3d_debugfs.c | 35 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/v3d/v3d_regs.h | 30 ++++++++++++++++++++++++++
> 2 files changed, 65 insertions(+)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
> index 4db62c545748..d48008adb085 100644
> --- a/drivers/gpu/drm/v3d/v3d_debugfs.c
> +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
> @@ -176,9 +176,44 @@ static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
> return 0;
> }
>
> +static int v3d_measure_clock(struct seq_file *m, void *unused)
> +{
> + struct drm_info_node *node = (struct drm_info_node *)m->private;
> + struct drm_device *dev = node->minor->dev;
> + struct v3d_dev *v3d = to_v3d_dev(dev);
> + uint32_t cycles;
> + int core = 0;
> + int measure_ms = 1000;
> +
> + if (v3d->ver >= 40) {
> + V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3,
> + V3D_SET_FIELD(V3D_PCTR_CYCLE_COUNT,
> + V3D_PCTR_S0));
> + V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1);
> + V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1);
> + } else {
> + V3D_CORE_WRITE(core, V3D_V3_PCTR_0_PCTRS0,
> + V3D_PCTR_CYCLE_COUNT);
> + V3D_CORE_WRITE(core, V3D_V3_PCTR_0_CLR, 1);
> + V3D_CORE_WRITE(core, V3D_V3_PCTR_0_EN,
> + V3D_V3_PCTR_0_EN_ENABLE |
> + 1);
> + }
> + msleep(measure_ms);
> + cycles = V3D_CORE_READ(core, V3D_PCTR_0_PCTR0);
> +
> + seq_printf(m, "cycles: %d (%d.%d Mhz)\n",
> + cycles,
> + cycles / (measure_ms * 1000),
> + (cycles / (measure_ms * 100)) % 10);
> +
> + return 0;
> +}
> +
> static const struct drm_info_list v3d_debugfs_list[] = {
> {"v3d_ident", v3d_v3d_debugfs_ident, 0},
> {"v3d_regs", v3d_v3d_debugfs_regs, 0},
> + {"measure_clock", v3d_measure_clock, 0},
> {"bo_stats", v3d_debugfs_bo_stats, 0},
> };
>
> diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
> index 854046565989..c3a5e4e44f73 100644
> --- a/drivers/gpu/drm/v3d/v3d_regs.h
> +++ b/drivers/gpu/drm/v3d/v3d_regs.h
> @@ -267,6 +267,36 @@
> # define V3D_PTB_BXCF_RWORDERDISA BIT(1)
> # define V3D_PTB_BXCF_CLIPDISA BIT(0)
>
> +#define V3D_V3_PCTR_0_EN 0x00674
> +#define V3D_V3_PCTR_0_EN_ENABLE BIT(31)
> +#define V3D_V4_PCTR_0_EN 0x00650
> +/* When a bit is set, resets the counter to 0. */
> +#define V3D_V3_PCTR_0_CLR 0x00670
> +#define V3D_V4_PCTR_0_CLR 0x00654
> +#define V3D_PCTR_0_OVERFLOW 0x00658
> +
> +#define V3D_V3_PCTR_0_PCTRS0 0x00684
> +#define V3D_V3_PCTR_0_PCTRS15 0x00660
> +#define V3D_V3_PCTR_0_PCTRSX(x) (V3D_V3_PCTR_0_PCTRS0 + \
> + 4 * (x))
> +/* Each src reg muxes four counters each. */
> +#define V3D_V4_PCTR_0_SRC_0_3 0x00660
> +#define V3D_V4_PCTR_0_SRC_28_31 0x0067c
> +# define V3D_PCTR_S0_MASK V3D_MASK(6, 0)
> +# define V3D_PCTR_S0_SHIFT 0
> +# define V3D_PCTR_S1_MASK V3D_MASK(14, 8)
> +# define V3D_PCTR_S1_SHIFT 8
> +# define V3D_PCTR_S2_MASK V3D_MASK(22, 16)
> +# define V3D_PCTR_S2_SHIFT 16
> +# define V3D_PCTR_S3_MASK V3D_MASK(30, 24)
> +# define V3D_PCTR_S3_SHIFT 24
> +# define V3D_PCTR_CYCLE_COUNT 32
> +
> +/* Output values of the counters. */
> +#define V3D_PCTR_0_PCTR0 0x00680
> +#define V3D_PCTR_0_PCTR31 0x006fc
> +#define V3D_PCTR_0_PCTRX(x) (V3D_PCTR_0_PCTR0 + \
> + 4 * (x))
> #define V3D_GMP_STATUS 0x00800
> # define V3D_GMP_STATUS_GMPRST BIT(31)
> # define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24)
On Fri, 28 Sep 2018 16:21:25 -0700
Eric Anholt <[email protected]> wrote:
> Since this is UAPI, it's good to document what exactly the guarantees
> we're providing are.
>
> Signed-off-by: Eric Anholt <[email protected]>
Reviewed-by: Boris Brezillon <[email protected]>
> ---
> include/uapi/drm/v3d_drm.h | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 7b6627783608..f446656d00b1 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -58,6 +58,11 @@ struct drm_v3d_submit_cl {
> * coordinate shader to determine where primitives land on the screen,
> * then writes out the state updates and draw calls necessary per tile
> * to the tile allocation BO.
> + *
> + * This BCL will block on any previous BCL submitted on the
> + * same FD, but not on any RCL or BCLs submitted by other
> + * clients -- that is left up to the submitter to control
> + * using in_sync_bcl if necessary.
> */
> __u32 bcl_start;
>
> @@ -69,6 +74,11 @@ struct drm_v3d_submit_cl {
> * This is the second set of commands executed, which will either
> * execute the tiles that have been set up by the BCL, or a fixed set
> * of tiles (in the case of RCL-only blits).
> + *
> + * This RCL will block on this submit's BCL, and any previous
> + * RCL submitted on the same FD, but not on any RCL or BCLs
> + * submitted by other clients -- that is left up to the
> + * submitter to control using in_sync_rcl if necessary.
> */
> __u32 rcl_start;
>
On Fri, 28 Sep 2018 16:21:26 -0700
Eric Anholt <[email protected]> wrote:
> Fixes an oops reading this debugfs entry on BCM7278.
>
You probably want to add:
Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
Cc: <[email protected]>
> Signed-off-by: Eric Anholt <[email protected]>
In any case, this is
Reviewed-by: Boris Brezillon <[email protected]>
> ---
> drivers/gpu/drm/v3d/v3d_debugfs.c | 11 +++++++----
> 1 file changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
> index d48008adb085..eb2b2d2f8553 100644
> --- a/drivers/gpu/drm/v3d/v3d_debugfs.c
> +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
> @@ -71,10 +71,13 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
> V3D_READ(v3d_hub_reg_defs[i].reg));
> }
>
> - for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
> - seq_printf(m, "%s (0x%04x): 0x%08x\n",
> - v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg,
> - V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
> + if (v3d->ver < 41) {
> + for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
> + seq_printf(m, "%s (0x%04x): 0x%08x\n",
> + v3d_gca_reg_defs[i].name,
> + v3d_gca_reg_defs[i].reg,
> + V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
> + }
> }
>
> for (core = 0; core < v3d->cores; core++) {