2023-11-21 00:40:36

by Rob Clark

[permalink] [raw]
Subject: [PATCH v2 0/7] drm/msm/gem: drm_exec conversion

From: Rob Clark <[email protected]>

Simplify the exec path (removing a legacy optimization) and convert to
drm_exec. One drm_exec patch to allow passing in the expected # of GEM
objects to avoid re-allocation.

I'd be a bit happier if I could avoid the extra objects table allocation
in drm_exec in the first place, but wasn't really happy with any of the
things I tried to get rid of that.

v2: updates in 6/7 and other nit-addressing

Rob Clark (7):
drm/msm/gem: Remove "valid" tracking
drm/msm/gem: Remove submit_unlock_unpin_bo()
drm/msm/gem: Don't queue job to sched in error cases
drm/msm/gem: Split out submit_unpin_objects() helper
drm/msm/gem: Cleanup submit_cleanup_bo()
drm/exec: Pass in initial # of objects
drm/msm/gem: Convert to drm_exec

.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 4 +-
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +-
drivers/gpu/drm/drm_exec.c | 13 +-
drivers/gpu/drm/msm/Kconfig | 1 +
drivers/gpu/drm/msm/msm_gem.h | 13 +-
drivers/gpu/drm/msm/msm_gem_submit.c | 199 +++++-------------
drivers/gpu/drm/msm/msm_ringbuffer.c | 3 +-
drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
drivers/gpu/drm/tests/drm_exec_test.c | 16 +-
include/drm/drm_exec.h | 2 +-
16 files changed, 92 insertions(+), 187 deletions(-)

--
2.42.0


2023-11-21 00:42:21

by Rob Clark

[permalink] [raw]
Subject: [PATCH v2 5/7] drm/msm/gem: Cleanup submit_cleanup_bo()

From: Rob Clark <[email protected]>

Now that it only handles unlock duty, drop the superfluous arg and
rename it.

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/msm_gem_submit.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index d001bf286606..603f04d851d9 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -248,14 +248,10 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
return ret;
}

-/* Unwind bo state, according to cleanup_flags. In the success case, only
- * the lock is dropped at the end of the submit (and active/pin ref is dropped
- * later when the submit is retired).
- */
-static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
- unsigned cleanup_flags)
+static void submit_unlock_bo(struct msm_gem_submit *submit, int i)
{
struct drm_gem_object *obj = submit->bos[i].obj;
+ unsigned cleanup_flags = BO_LOCKED;
unsigned flags = submit->bos[i].flags & cleanup_flags;

/*
@@ -304,10 +300,10 @@ static int submit_lock_objects(struct msm_gem_submit *submit)
}

for (; i >= 0; i--)
- submit_cleanup_bo(submit, i, BO_LOCKED);
+ submit_unlock_bo(submit, i);

if (slow_locked > 0)
- submit_cleanup_bo(submit, slow_locked, BO_LOCKED);
+ submit_unlock_bo(submit, slow_locked);

if (ret == -EDEADLK) {
struct drm_gem_object *obj = submit->bos[contended].obj;
@@ -533,7 +529,6 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
*/
static void submit_cleanup(struct msm_gem_submit *submit, bool error)
{
- unsigned cleanup_flags = BO_LOCKED;
unsigned i;

if (error)
@@ -541,7 +536,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error)

for (i = 0; i < submit->nr_bos; i++) {
struct drm_gem_object *obj = submit->bos[i].obj;
- submit_cleanup_bo(submit, i, cleanup_flags);
+ submit_unlock_bo(submit, i);
if (error)
drm_gem_object_put(obj);
}
--
2.42.0

2023-11-21 00:42:51

by Rob Clark

[permalink] [raw]
Subject: [PATCH v2 3/7] drm/msm/gem: Don't queue job to sched in error cases

From: Rob Clark <[email protected]>

We shouldn't be running the job in error cases. This also avoids having
to think too hard about where the objs get unpinned (and if necessary,
the resv takes over tracking that the obj is busy).. ie. error cases it
always happens synchronously, and normal cases it happens from scheduler
job_run() callback.

Signed-off-by: Rob Clark <[email protected]>
Reviewed-by: Dmitry Baryshkov <[email protected]>
---
drivers/gpu/drm/msm/msm_gem_submit.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 2d5527dc3e1a..786b48a55309 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -946,6 +946,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
}
}

+ if (ret)
+ goto out;
+
submit_attach_object_fences(submit);

/* The scheduler owns a ref now: */
--
2.42.0

2023-11-21 00:42:52

by Rob Clark

[permalink] [raw]
Subject: [PATCH v2 6/7] drm/exec: Pass in initial # of objects

From: Rob Clark <[email protected]>

In cases where the # is known ahead of time, it is silly to do the table
resize dance.

Signed-off-by: Rob Clark <[email protected]>
Reviewed-by: Christian König <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 ++++----
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 4 ++--
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +-
drivers/gpu/drm/drm_exec.c | 13 ++++++++++---
drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
drivers/gpu/drm/tests/drm_exec_test.c | 16 ++++++++--------
include/drm/drm_exec.h | 2 +-
12 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 41fbc4fd0fac..0bd3c4a6267a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1137,7 +1137,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,

ctx->n_vms = 1;
ctx->sync = &mem->sync;
- drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&ctx->exec) {
ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
drm_exec_retry_on_contention(&ctx->exec);
@@ -1176,7 +1176,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
int ret;

ctx->sync = &mem->sync;
- drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&ctx->exec) {
ctx->n_vms = 0;
list_for_each_entry(entry, &mem->attachments, list) {
@@ -2552,7 +2552,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)

amdgpu_sync_create(&sync);

- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
/* Reserve all BOs and page tables for validation */
drm_exec_until_all_locked(&exec) {
/* Reserve all the page directories */
@@ -2793,7 +2793,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)

mutex_lock(&process_info->lock);

- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index df3ecfa9e13f..2464606494d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -66,7 +66,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,

amdgpu_sync_create(&p->sync);
drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
return 0;
}

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 720011019741..796fa6f1420b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
@@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 84beeaa4d21c..49a5f1c73b3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_exec exec;
long r;

- drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
drm_exec_retry_on_contention(&exec);
@@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
}

drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
if (gobj) {
r = drm_exec_lock_obj(&exec, gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 9ddbf1494326..abd0b9763904 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1122,7 +1122,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,

amdgpu_sync_create(&sync);

- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_lock_obj(&exec,
&ctx_data->meta_data_obj->tbo.base);
@@ -1193,7 +1193,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
struct drm_exec exec;
long r;

- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_lock_obj(&exec,
&ctx_data->meta_data_obj->tbo.base);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index ca45ba8ac171..bfbf59326ee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -86,7 +86,7 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,

amdgpu_sync_create(&sync);

- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_lock_obj(&exec, &bo->tbo.base);
drm_exec_retry_on_contention(&exec);
@@ -149,7 +149,7 @@ static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
long r;

- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_lock_obj(&exec, &bo->tbo.base);
drm_exec_retry_on_contention(&exec);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2f3c338fd94..76d9f14ccc7c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1529,7 +1529,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
uint32_t gpuidx;
int r;

- drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
+ drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0);
drm_exec_until_all_locked(&ctx->exec) {
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
index 5d2809de4517..48ee851b61d9 100644
--- a/drivers/gpu/drm/drm_exec.c
+++ b/drivers/gpu/drm/drm_exec.c
@@ -69,16 +69,23 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
* drm_exec_init - initialize a drm_exec object
* @exec: the drm_exec object to initialize
* @flags: controls locking behavior, see DRM_EXEC_* defines
+ * @nr: the initial # of objects
*
* Initialize the object and make sure that we can track locked objects.
+ *
+ * If nr is non-zero then it is used as the initial objects table size.
+ * In either case, the table will grow (be re-allocated) on demand.
*/
-void drm_exec_init(struct drm_exec *exec, uint32_t flags)
+void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr)
{
+ if (!nr)
+ nr = PAGE_SIZE / sizeof(void *);
+
exec->flags = flags;
- exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ exec->objects = kvmalloc_array(nr, sizeof(void *), GFP_KERNEL);

/* If allocation here fails, just delay that till the first use */
- exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0;
+ exec->max_objects = exec->objects ? nr : 0;
exec->num_objects = 0;
exec->contended = DRM_EXEC_DUMMY;
exec->prelocked = NULL;
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index 9a5ef574744b..769392276e4f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -103,7 +103,7 @@ nouveau_exec_job_submit(struct nouveau_job *job)

nouveau_uvmm_lock(uvmm);
drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(exec) {
struct drm_gpuva *va;

diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 5cf892c50f43..f93cfccc2c93 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1288,7 +1288,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
}

drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(exec) {
list_for_each_op(op, &bind_job->ops) {
struct drm_gpuva_op *va_op;
diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c
index 563949d777dd..81f928a429ba 100644
--- a/drivers/gpu/drm/tests/drm_exec_test.c
+++ b/drivers/gpu/drm/tests/drm_exec_test.c
@@ -46,7 +46,7 @@ static void sanitycheck(struct kunit *test)
{
struct drm_exec exec;

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_fini(&exec);
KUNIT_SUCCEED(test);
}
@@ -60,7 +60,7 @@ static void test_lock(struct kunit *test)

drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
ret = drm_exec_lock_obj(&exec, &gobj);
drm_exec_retry_on_contention(&exec);
@@ -80,7 +80,7 @@ static void test_lock_unlock(struct kunit *test)

drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
ret = drm_exec_lock_obj(&exec, &gobj);
drm_exec_retry_on_contention(&exec);
@@ -107,7 +107,7 @@ static void test_duplicates(struct kunit *test)

drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);

- drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
ret = drm_exec_lock_obj(&exec, &gobj);
drm_exec_retry_on_contention(&exec);
@@ -134,7 +134,7 @@ static void test_prepare(struct kunit *test)

drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
ret = drm_exec_prepare_obj(&exec, &gobj, 1);
drm_exec_retry_on_contention(&exec);
@@ -159,7 +159,7 @@ static void test_prepare_array(struct kunit *test)
drm_gem_private_object_init(priv->drm, &gobj1, PAGE_SIZE);
drm_gem_private_object_init(priv->drm, &gobj2, PAGE_SIZE);

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec)
ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array),
1);
@@ -174,14 +174,14 @@ static void test_multiple_loops(struct kunit *test)
{
struct drm_exec exec;

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec)
{
break;
}
drm_exec_fini(&exec);

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec)
{
break;
diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
index b5bf0b6da791..f1a66c048721 100644
--- a/include/drm/drm_exec.h
+++ b/include/drm/drm_exec.h
@@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec)
return !!exec->contended;
}

-void drm_exec_init(struct drm_exec *exec, uint32_t flags);
+void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr);
void drm_exec_fini(struct drm_exec *exec);
bool drm_exec_cleanup(struct drm_exec *exec);
int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);
--
2.42.0

2023-11-21 00:43:05

by Rob Clark

[permalink] [raw]
Subject: [PATCH v2 7/7] drm/msm/gem: Convert to drm_exec

From: Rob Clark <[email protected]>

Replace the ww_mutex locking dance with the drm_exec helper.

v2: Error path fixes, move drm_exec_fini so we only call it once (and
only if we have drm_exec_init()

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/Kconfig | 1 +
drivers/gpu/drm/msm/msm_gem.h | 5 +-
drivers/gpu/drm/msm/msm_gem_submit.c | 119 +++++----------------------
3 files changed, 24 insertions(+), 101 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 6309a857ca31..f91d87afc0d3 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -16,6 +16,7 @@ config DRM_MSM
select DRM_DP_AUX_BUS
select DRM_DISPLAY_DP_HELPER
select DRM_DISPLAY_HELPER
+ select DRM_EXEC
select DRM_KMS_HELPER
select DRM_PANEL
select DRM_BRIDGE
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index af884ced7a0d..7f34263048a3 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -9,6 +9,7 @@

#include <linux/kref.h>
#include <linux/dma-resv.h>
+#include "drm/drm_exec.h"
#include "drm/gpu_scheduler.h"
#include "msm_drv.h"

@@ -254,7 +255,7 @@ struct msm_gem_submit {
struct msm_gpu *gpu;
struct msm_gem_address_space *aspace;
struct list_head node; /* node in ring submit list */
- struct ww_acquire_ctx ticket;
+ struct drm_exec exec;
uint32_t seqno; /* Sequence number of the submit on the ring */

/* Hw fence, which is created when the scheduler executes the job, and
@@ -287,8 +288,6 @@ struct msm_gem_submit {
struct drm_msm_gem_submit_reloc *relocs;
} *cmd; /* array of size nr_cmds */
struct {
-/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
-#define BO_LOCKED 0x4000 /* obj lock is held */
uint32_t flags;
union {
struct drm_gem_object *obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 603f04d851d9..40878c26a749 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -248,85 +248,30 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
return ret;
}

-static void submit_unlock_bo(struct msm_gem_submit *submit, int i)
-{
- struct drm_gem_object *obj = submit->bos[i].obj;
- unsigned cleanup_flags = BO_LOCKED;
- unsigned flags = submit->bos[i].flags & cleanup_flags;
-
- /*
- * Clear flags bit before dropping lock, so that the msm_job_run()
- * path isn't racing with submit_cleanup() (ie. the read/modify/
- * write is protected by the obj lock in all paths)
- */
- submit->bos[i].flags &= ~cleanup_flags;
-
- if (flags & BO_LOCKED)
- dma_resv_unlock(obj->resv);
-}
-
/* This is where we make sure all the bo's are reserved and pin'd: */
static int submit_lock_objects(struct msm_gem_submit *submit)
{
- int contended, slow_locked = -1, i, ret = 0;
-
-retry:
- for (i = 0; i < submit->nr_bos; i++) {
- struct drm_gem_object *obj = submit->bos[i].obj;
-
- if (slow_locked == i)
- slow_locked = -1;
+ int ret;

- contended = i;
+ drm_exec_init(&submit->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, submit->nr_bos);

- if (!(submit->bos[i].flags & BO_LOCKED)) {
- ret = dma_resv_lock_interruptible(obj->resv,
- &submit->ticket);
+ drm_exec_until_all_locked (&submit->exec) {
+ for (unsigned i = 0; i < submit->nr_bos; i++) {
+ struct drm_gem_object *obj = submit->bos[i].obj;
+ ret = drm_exec_prepare_obj(&submit->exec, obj, 1);
+ drm_exec_retry_on_contention(&submit->exec);
if (ret)
- goto fail;
- submit->bos[i].flags |= BO_LOCKED;
+ goto error;
}
}

- ww_acquire_done(&submit->ticket);
-
return 0;

-fail:
- if (ret == -EALREADY) {
- SUBMIT_ERROR(submit, "handle %u at index %u already on submit list\n",
- submit->bos[i].handle, i);
- ret = -EINVAL;
- }
-
- for (; i >= 0; i--)
- submit_unlock_bo(submit, i);
-
- if (slow_locked > 0)
- submit_unlock_bo(submit, slow_locked);
-
- if (ret == -EDEADLK) {
- struct drm_gem_object *obj = submit->bos[contended].obj;
- /* we lost out in a seqno race, lock and retry.. */
- ret = dma_resv_lock_slow_interruptible(obj->resv,
- &submit->ticket);
- if (!ret) {
- submit->bos[contended].flags |= BO_LOCKED;
- slow_locked = contended;
- goto retry;
- }
-
- /* Not expecting -EALREADY here, if the bo was already
- * locked, we should have gotten -EALREADY already from
- * the dma_resv_lock_interruptable() call.
- */
- WARN_ON_ONCE(ret == -EALREADY);
- }
-
+error:
return ret;
}

-static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
+static int submit_fence_sync(struct msm_gem_submit *submit)
{
int i, ret = 0;

@@ -334,22 +279,6 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
struct drm_gem_object *obj = submit->bos[i].obj;
bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE;

- /* NOTE: _reserve_shared() must happen before
- * _add_shared_fence(), which makes this a slightly
- * strange place to call it. OTOH this is a
- * convenient can-fail point to hook it in.
- */
- ret = dma_resv_reserve_fences(obj->resv, 1);
- if (ret)
- return ret;
-
- /* If userspace has determined that explicit fencing is
- * used, it can disable implicit sync on the entire
- * submit:
- */
- if (no_implicit)
- continue;
-
/* Otherwise userspace can ask for implicit sync to be
* disabled on specific buffers. This is useful for internal
* usermode driver managed buffers, suballocation, etc.
@@ -529,17 +458,14 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
*/
static void submit_cleanup(struct msm_gem_submit *submit, bool error)
{
- unsigned i;
-
- if (error)
+ if (error) {
submit_unpin_objects(submit);
-
- for (i = 0; i < submit->nr_bos; i++) {
- struct drm_gem_object *obj = submit->bos[i].obj;
- submit_unlock_bo(submit, i);
- if (error)
- drm_gem_object_put(obj);
+ /* job wasn't enqueued to scheduler, so early retirement: */
+ msm_submit_retire(submit);
}
+
+ if (submit->exec.objects)
+ drm_exec_fini(&submit->exec);
}

void msm_submit_retire(struct msm_gem_submit *submit)
@@ -733,7 +659,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
struct msm_submit_post_dep *post_deps = NULL;
struct drm_syncobj **syncobjs_to_reset = NULL;
int out_fence_fd = -1;
- bool has_ww_ticket = false;
unsigned i;
int ret;

@@ -839,15 +764,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;

/* copy_*_user while holding a ww ticket upsets lockdep */
- ww_acquire_init(&submit->ticket, &reservation_ww_class);
- has_ww_ticket = true;
ret = submit_lock_objects(submit);
if (ret)
goto out;

- ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT));
- if (ret)
- goto out;
+ if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) {
+ ret = submit_fence_sync(submit);
+ if (ret)
+ goto out;
+ }

ret = submit_pin_objects(submit);
if (ret)
@@ -978,8 +903,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,

out:
submit_cleanup(submit, !!ret);
- if (has_ww_ticket)
- ww_acquire_fini(&submit->ticket);
out_unlock:
mutex_unlock(&queue->lock);
out_post_unlock:
--
2.42.0

2023-12-03 11:31:09

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH v2 5/7] drm/msm/gem: Cleanup submit_cleanup_bo()

On 21/11/2023 02:38, Rob Clark wrote:
> From: Rob Clark <[email protected]>
>
> Now that it only handles unlock duty, drop the superfluous arg and
> rename it.
>
> Signed-off-by: Rob Clark <[email protected]>

Reviewed-by: Dmitry Baryshkov <[email protected]>

> ---
> drivers/gpu/drm/msm/msm_gem_submit.c | 15 +++++----------
> 1 file changed, 5 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index d001bf286606..603f04d851d9 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -248,14 +248,10 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
> return ret;
> }
>
> -/* Unwind bo state, according to cleanup_flags. In the success case, only
> - * the lock is dropped at the end of the submit (and active/pin ref is dropped
> - * later when the submit is retired).
> - */
> -static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
> - unsigned cleanup_flags)
> +static void submit_unlock_bo(struct msm_gem_submit *submit, int i)
> {
> struct drm_gem_object *obj = submit->bos[i].obj;
> + unsigned cleanup_flags = BO_LOCKED;

Nit: checkpatch will warn here, it should be unsigned int.

> unsigned flags = submit->bos[i].flags & cleanup_flags;
>
> /*
> @@ -304,10 +300,10 @@ static int submit_lock_objects(struct msm_gem_submit *submit)
> }
>
> for (; i >= 0; i--)
> - submit_cleanup_bo(submit, i, BO_LOCKED);
> + submit_unlock_bo(submit, i);
>
> if (slow_locked > 0)
> - submit_cleanup_bo(submit, slow_locked, BO_LOCKED);
> + submit_unlock_bo(submit, slow_locked);
>
> if (ret == -EDEADLK) {
> struct drm_gem_object *obj = submit->bos[contended].obj;
> @@ -533,7 +529,6 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
> */
> static void submit_cleanup(struct msm_gem_submit *submit, bool error)
> {
> - unsigned cleanup_flags = BO_LOCKED;
> unsigned i;
>
> if (error)
> @@ -541,7 +536,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error)
>
> for (i = 0; i < submit->nr_bos; i++) {
> struct drm_gem_object *obj = submit->bos[i].obj;
> - submit_cleanup_bo(submit, i, cleanup_flags);
> + submit_unlock_bo(submit, i);
> if (error)
> drm_gem_object_put(obj);
> }

--
With best wishes
Dmitry