2023-10-27 17:00:04

by Rob Clark

[permalink] [raw]
Subject: [PATCH 0/7] drm/msm/gem: drm_exec conversion

From: Rob Clark <[email protected]>

Simplify the exec path (removing a legacy optimization) and convert to
drm_exec. One drm_exec patch to allow passing in the expected # of GEM
objects to avoid re-allocation.

I'd be a bit happier if I could avoid the extra objects table allocation
in drm_exec in the first place, but wasn't really happy with any of the
things I tried to get rid of that.

Rob Clark (7):
drm/msm/gem: Remove "valid" tracking
drm/msm/gem: Remove submit_unlock_unpin_bo()
drm/msm/gem: Don't queue job to sched in error cases
drm/msm/gem: Split out submit_unpin_objects() helper
drm/msm/gem: Cleanup submit_cleanup_bo()
drm/exec: Pass in initial # of objects
drm/msm/gem: Convert to drm_exec

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 +-
drivers/gpu/drm/drm_exec.c | 15 +-
drivers/gpu/drm/msm/Kconfig | 1 +
drivers/gpu/drm/msm/msm_gem.h | 13 +-
drivers/gpu/drm/msm/msm_gem_submit.c | 197 ++++++------------------
drivers/gpu/drm/msm/msm_ringbuffer.c | 3 +-
drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
include/drm/drm_exec.h | 2 +-
12 files changed, 79 insertions(+), 170 deletions(-)

--
2.41.0


2023-10-27 17:00:04

by Rob Clark

[permalink] [raw]
Subject: [PATCH 1/7] drm/msm/gem: Remove "valid" tracking

From: Rob Clark <[email protected]>

This was a small optimization for pre-soft-pin userspace. But mesa
switched to soft-pin nearly 5yrs ago. So lets drop the optimization
and simplify the code.

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/msm_gem.h | 2 --
drivers/gpu/drm/msm/msm_gem_submit.c | 44 +++++-----------------------
2 files changed, 8 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 8ddef5443140..c36c1c1fa222 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -271,7 +271,6 @@ struct msm_gem_submit {
struct msm_gpu_submitqueue *queue;
struct pid *pid; /* submitting process */
bool fault_dumped; /* Limit devcoredump dumping to one per submit */
- bool valid; /* true if no cmdstream patching needed */
bool in_rb; /* "sudo" mode, copy cmds into RB */
struct msm_ringbuffer *ring;
unsigned int nr_cmds;
@@ -288,7 +287,6 @@ struct msm_gem_submit {
} *cmd; /* array of size nr_cmds */
struct {
/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
-#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */
#define BO_LOCKED 0x4000 /* obj lock is held */
#define BO_PINNED 0x2000 /* obj (pages) is pinned and on active list */
uint32_t flags;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 6d8ec1337e8b..996274ef32a6 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -150,8 +150,6 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,

submit->bos[i].handle = submit_bo.handle;
submit->bos[i].flags = submit_bo.flags;
- /* in validate_objects() we figure out if this is true: */
- submit->bos[i].iova = submit_bo.presumed;
}

spin_lock(&file->table_lock);
@@ -278,9 +276,6 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
{
unsigned cleanup_flags = BO_PINNED | BO_LOCKED;
submit_cleanup_bo(submit, i, cleanup_flags);
-
- if (!(submit->bos[i].flags & BO_VALID))
- submit->bos[i].iova = 0;
}

/* This is where we make sure all the bo's are reserved and pin'd: */
@@ -390,8 +385,6 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
struct msm_drm_private *priv = submit->dev->dev_private;
int i, ret = 0;

- submit->valid = true;
-
for (i = 0; i < submit->nr_bos; i++) {
struct drm_gem_object *obj = submit->bos[i].obj;
struct msm_gem_vma *vma;
@@ -407,14 +400,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
if (ret)
break;

- if (vma->iova == submit->bos[i].iova) {
- submit->bos[i].flags |= BO_VALID;
- } else {
- submit->bos[i].iova = vma->iova;
- /* iova changed, so address in cmdstream is not valid: */
- submit->bos[i].flags &= ~BO_VALID;
- submit->valid = false;
- }
+ submit->bos[i].iova = vma->iova;
}

/*
@@ -451,7 +437,7 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit)
}

static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
- struct drm_gem_object **obj, uint64_t *iova, bool *valid)
+ struct drm_gem_object **obj, uint64_t *iova)
{
if (idx >= submit->nr_bos) {
SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n",
@@ -463,8 +449,6 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
*obj = submit->bos[idx].obj;
if (iova)
*iova = submit->bos[idx].iova;
- if (valid)
- *valid = !!(submit->bos[idx].flags & BO_VALID);

return 0;
}
@@ -477,9 +461,6 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
uint32_t *ptr;
int ret = 0;

- if (!nr_relocs)
- return 0;
-
if (offset % 4) {
SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset);
return -EINVAL;
@@ -500,7 +481,6 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
struct drm_msm_gem_submit_reloc submit_reloc = relocs[i];
uint32_t off;
uint64_t iova;
- bool valid;

if (submit_reloc.submit_offset % 4) {
SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n",
@@ -519,13 +499,10 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
goto out;
}

- ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid);
+ ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova);
if (ret)
goto out;

- if (valid)
- continue;
-
iova += submit_reloc.reloc_offset;

if (submit_reloc.shift < 0)
@@ -879,8 +856,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
struct drm_gem_object *obj;
uint64_t iova;

- ret = submit_bo(submit, submit->cmd[i].idx,
- &obj, &iova, NULL);
+ ret = submit_bo(submit, submit->cmd[i].idx, &obj, &iova);
if (ret)
goto out;

@@ -894,17 +870,13 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,

submit->cmd[i].iova = iova + (submit->cmd[i].offset * 4);

- if (submit->valid)
+ if (likely(!submit->cmd[i].nr_relocs))
continue;

if (!gpu->allow_relocs) {
- if (submit->cmd[i].nr_relocs) {
- SUBMIT_ERROR(submit, "relocs not allowed\n");
- ret = -EINVAL;
- goto out;
- }
-
- continue;
+ SUBMIT_ERROR(submit, "relocs not allowed\n");
+ ret = -EINVAL;
+ goto out;
}

ret = submit_reloc(submit, obj, submit->cmd[i].offset * 4,
--
2.41.0

2023-10-27 17:00:05

by Rob Clark

[permalink] [raw]
Subject: [PATCH 3/7] drm/msm/gem: Don't queue job to sched in error cases

From: Rob Clark <[email protected]>

We shouldn't be running the job in error cases. This also avoids having
to think too hard about where the objs get unpinned (and if necessary,
the resv takes over tracking that the obj is busy).. ie. error cases it
always happens synchronously, and normal cases it happens from scheduler
job_run() callback.

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/msm_gem_submit.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 2d5527dc3e1a..786b48a55309 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -946,6 +946,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
}
}

+ if (ret)
+ goto out;
+
submit_attach_object_fences(submit);

/* The scheduler owns a ref now: */
--
2.41.0

2023-10-27 17:00:07

by Rob Clark

[permalink] [raw]
Subject: [PATCH 2/7] drm/msm/gem: Remove submit_unlock_unpin_bo()

From: Rob Clark <[email protected]>

The only point it is called is before pinning objects, so the "unpin"
part of the name is fiction. Just remove call submit_cleanup_bo()
directly.

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/msm_gem_submit.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 996274ef32a6..2d5527dc3e1a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -272,12 +272,6 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
dma_resv_unlock(obj->resv);
}

-static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
-{
- unsigned cleanup_flags = BO_PINNED | BO_LOCKED;
- submit_cleanup_bo(submit, i, cleanup_flags);
-}
-
/* This is where we make sure all the bo's are reserved and pin'd: */
static int submit_lock_objects(struct msm_gem_submit *submit)
{
@@ -313,10 +307,10 @@ static int submit_lock_objects(struct msm_gem_submit *submit)
}

for (; i >= 0; i--)
- submit_unlock_unpin_bo(submit, i);
+ submit_cleanup_bo(submit, i, BO_LOCKED);

if (slow_locked > 0)
- submit_unlock_unpin_bo(submit, slow_locked);
+ submit_cleanup_bo(submit, slow_locked, BO_LOCKED);

if (ret == -EDEADLK) {
struct drm_gem_object *obj = submit->bos[contended].obj;
--
2.41.0

2023-10-27 17:00:49

by Rob Clark

[permalink] [raw]
Subject: [PATCH 4/7] drm/msm/gem: Split out submit_unpin_objects() helper

From: Rob Clark <[email protected]>

Untangle unpinning from unlock/unref loop. The unpin only happens in
error paths so it is easier to decouple from the normal unlock path.

Since we never have an intermediate state where a subset of buffers
are pinned (ie. we never bail out of the pin or unpin loops) we can
replace the bo state flag bit with a global flag in the submit.

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/msm_gem.h | 6 +++---
drivers/gpu/drm/msm/msm_gem_submit.c | 22 +++++++++++++++++-----
drivers/gpu/drm/msm/msm_ringbuffer.c | 3 ++-
3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index c36c1c1fa222..af884ced7a0d 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -270,8 +270,9 @@ struct msm_gem_submit {
int fence_id; /* key into queue->fence_idr */
struct msm_gpu_submitqueue *queue;
struct pid *pid; /* submitting process */
- bool fault_dumped; /* Limit devcoredump dumping to one per submit */
- bool in_rb; /* "sudo" mode, copy cmds into RB */
+ bool bos_pinned : 1;
+ bool fault_dumped:1;/* Limit devcoredump dumping to one per submit */
+ bool in_rb : 1; /* "sudo" mode, copy cmds into RB */
struct msm_ringbuffer *ring;
unsigned int nr_cmds;
unsigned int nr_bos;
@@ -288,7 +289,6 @@ struct msm_gem_submit {
struct {
/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
#define BO_LOCKED 0x4000 /* obj lock is held */
-#define BO_PINNED 0x2000 /* obj (pages) is pinned and on active list */
uint32_t flags;
union {
struct drm_gem_object *obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 786b48a55309..d001bf286606 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -265,9 +265,6 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
*/
submit->bos[i].flags &= ~cleanup_flags;

- if (flags & BO_PINNED)
- msm_gem_unpin_locked(obj);
-
if (flags & BO_LOCKED)
dma_resv_unlock(obj->resv);
}
@@ -407,13 +404,28 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
mutex_lock(&priv->lru.lock);
for (i = 0; i < submit->nr_bos; i++) {
msm_gem_pin_obj_locked(submit->bos[i].obj);
- submit->bos[i].flags |= BO_PINNED;
}
mutex_unlock(&priv->lru.lock);

+ submit->bos_pinned = true;
+
return ret;
}

+static void submit_unpin_objects(struct msm_gem_submit *submit)
+{
+ if (!submit->bos_pinned)
+ return;
+
+ for (int i = 0; i < submit->nr_bos; i++) {
+ struct drm_gem_object *obj = submit->bos[i].obj;
+
+ msm_gem_unpin_locked(obj);
+ }
+
+ submit->bos_pinned = false;
+}
+
static void submit_attach_object_fences(struct msm_gem_submit *submit)
{
int i;
@@ -525,7 +537,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error)
unsigned i;

if (error)
- cleanup_flags |= BO_PINNED;
+ submit_unpin_objects(submit);

for (i = 0; i < submit->nr_bos; i++) {
struct drm_gem_object *obj = submit->bos[i].obj;
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 9d6e2e10d25a..7ea5eca118eb 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -29,9 +29,10 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
struct drm_gem_object *obj = submit->bos[i].obj;

msm_gem_unpin_active(obj);
- submit->bos[i].flags &= ~BO_PINNED;
}

+ submit->bos_pinned = false;
+
mutex_unlock(&priv->lru.lock);

msm_gpu_submit(gpu, submit);
--
2.41.0

2023-10-27 17:00:56

by Rob Clark

[permalink] [raw]
Subject: [PATCH 7/7] drm/msm/gem: Convert to drm_exec

From: Rob Clark <[email protected]>

Replace the ww_mutex locking dance with the drm_exec helper.

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/Kconfig | 1 +
drivers/gpu/drm/msm/msm_gem.h | 5 +-
drivers/gpu/drm/msm/msm_gem_submit.c | 117 +++++----------------------
3 files changed, 24 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 6309a857ca31..f91d87afc0d3 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -16,6 +16,7 @@ config DRM_MSM
select DRM_DP_AUX_BUS
select DRM_DISPLAY_DP_HELPER
select DRM_DISPLAY_HELPER
+ select DRM_EXEC
select DRM_KMS_HELPER
select DRM_PANEL
select DRM_BRIDGE
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index af884ced7a0d..7f34263048a3 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -9,6 +9,7 @@

#include <linux/kref.h>
#include <linux/dma-resv.h>
+#include "drm/drm_exec.h"
#include "drm/gpu_scheduler.h"
#include "msm_drv.h"

@@ -254,7 +255,7 @@ struct msm_gem_submit {
struct msm_gpu *gpu;
struct msm_gem_address_space *aspace;
struct list_head node; /* node in ring submit list */
- struct ww_acquire_ctx ticket;
+ struct drm_exec exec;
uint32_t seqno; /* Sequence number of the submit on the ring */

/* Hw fence, which is created when the scheduler executes the job, and
@@ -287,8 +288,6 @@ struct msm_gem_submit {
struct drm_msm_gem_submit_reloc *relocs;
} *cmd; /* array of size nr_cmds */
struct {
-/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
-#define BO_LOCKED 0x4000 /* obj lock is held */
uint32_t flags;
union {
struct drm_gem_object *obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 603f04d851d9..f8d14d4ccfef 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -248,85 +248,31 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
return ret;
}

-static void submit_unlock_bo(struct msm_gem_submit *submit, int i)
-{
- struct drm_gem_object *obj = submit->bos[i].obj;
- unsigned cleanup_flags = BO_LOCKED;
- unsigned flags = submit->bos[i].flags & cleanup_flags;
-
- /*
- * Clear flags bit before dropping lock, so that the msm_job_run()
- * path isn't racing with submit_cleanup() (ie. the read/modify/
- * write is protected by the obj lock in all paths)
- */
- submit->bos[i].flags &= ~cleanup_flags;
-
- if (flags & BO_LOCKED)
- dma_resv_unlock(obj->resv);
-}
-
/* This is where we make sure all the bo's are reserved and pin'd: */
static int submit_lock_objects(struct msm_gem_submit *submit)
{
- int contended, slow_locked = -1, i, ret = 0;
-
-retry:
- for (i = 0; i < submit->nr_bos; i++) {
- struct drm_gem_object *obj = submit->bos[i].obj;
-
- if (slow_locked == i)
- slow_locked = -1;
+ int ret;

- contended = i;
+ drm_exec_init(&submit->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, submit->nr_bos);

- if (!(submit->bos[i].flags & BO_LOCKED)) {
- ret = dma_resv_lock_interruptible(obj->resv,
- &submit->ticket);
+ drm_exec_until_all_locked (&submit->exec) {
+ for (unsigned i = 0; i < submit->nr_bos; i++) {
+ struct drm_gem_object *obj = submit->bos[i].obj;
+ ret = drm_exec_prepare_obj(&submit->exec, obj, 1);
+ drm_exec_retry_on_contention(&submit->exec);
if (ret)
- goto fail;
- submit->bos[i].flags |= BO_LOCKED;
+ goto error;
}
}

- ww_acquire_done(&submit->ticket);
-
return 0;

-fail:
- if (ret == -EALREADY) {
- SUBMIT_ERROR(submit, "handle %u at index %u already on submit list\n",
- submit->bos[i].handle, i);
- ret = -EINVAL;
- }
-
- for (; i >= 0; i--)
- submit_unlock_bo(submit, i);
-
- if (slow_locked > 0)
- submit_unlock_bo(submit, slow_locked);
-
- if (ret == -EDEADLK) {
- struct drm_gem_object *obj = submit->bos[contended].obj;
- /* we lost out in a seqno race, lock and retry.. */
- ret = dma_resv_lock_slow_interruptible(obj->resv,
- &submit->ticket);
- if (!ret) {
- submit->bos[contended].flags |= BO_LOCKED;
- slow_locked = contended;
- goto retry;
- }
-
- /* Not expecting -EALREADY here, if the bo was already
- * locked, we should have gotten -EALREADY already from
- * the dma_resv_lock_interruptable() call.
- */
- WARN_ON_ONCE(ret == -EALREADY);
- }
-
+error:
+ drm_exec_fini(&submit->exec);
return ret;
}

-static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
+static int submit_fence_sync(struct msm_gem_submit *submit)
{
int i, ret = 0;

@@ -334,22 +280,6 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
struct drm_gem_object *obj = submit->bos[i].obj;
bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE;

- /* NOTE: _reserve_shared() must happen before
- * _add_shared_fence(), which makes this a slightly
- * strange place to call it. OTOH this is a
- * convenient can-fail point to hook it in.
- */
- ret = dma_resv_reserve_fences(obj->resv, 1);
- if (ret)
- return ret;
-
- /* If userspace has determined that explicit fencing is
- * used, it can disable implicit sync on the entire
- * submit:
- */
- if (no_implicit)
- continue;
-
/* Otherwise userspace can ask for implicit sync to be
* disabled on specific buffers. This is useful for internal
* usermode driver managed buffers, suballocation, etc.
@@ -531,15 +461,13 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error)
{
unsigned i;

- if (error)
+ if (error) {
submit_unpin_objects(submit);
-
- for (i = 0; i < submit->nr_bos; i++) {
- struct drm_gem_object *obj = submit->bos[i].obj;
- submit_unlock_bo(submit, i);
- if (error)
- drm_gem_object_put(obj);
+ /* job wasn't enqueued to scheduler, so early retirement: */
+ msm_submit_retire(submit);
}
+
+ drm_exec_fini(&submit->exec);
}

void msm_submit_retire(struct msm_gem_submit *submit)
@@ -733,7 +661,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
struct msm_submit_post_dep *post_deps = NULL;
struct drm_syncobj **syncobjs_to_reset = NULL;
int out_fence_fd = -1;
- bool has_ww_ticket = false;
unsigned i;
int ret;

@@ -839,15 +766,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;

/* copy_*_user while holding a ww ticket upsets lockdep */
- ww_acquire_init(&submit->ticket, &reservation_ww_class);
- has_ww_ticket = true;
ret = submit_lock_objects(submit);
if (ret)
goto out;

- ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT));
- if (ret)
- goto out;
+ if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) {
+ ret = submit_fence_sync(submit);
+ if (ret)
+ goto out;
+ }

ret = submit_pin_objects(submit);
if (ret)
@@ -978,8 +905,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,

out:
submit_cleanup(submit, !!ret);
- if (has_ww_ticket)
- ww_acquire_fini(&submit->ticket);
out_unlock:
mutex_unlock(&queue->lock);
out_post_unlock:
--
2.41.0

2023-10-27 17:01:01

by Rob Clark

[permalink] [raw]
Subject: [PATCH 6/7] drm/exec: Pass in initial # of objects

From: Rob Clark <[email protected]>

In cases where the # is known ahead of time, it is silly to do the table
resize dance.

Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++--
drivers/gpu/drm/drm_exec.c | 15 ++++++++++++---
drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
include/drm/drm_exec.h | 2 +-
8 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index efdb1c48f431..d27ca8f61929 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -65,7 +65,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
}

amdgpu_sync_create(&p->sync);
- drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
return 0;
}

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 720011019741..796fa6f1420b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
@@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;

- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index ca4d2d430e28..16f1715148ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_exec exec;
long r;

- drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
drm_exec_retry_on_contention(&exec);
@@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
}

drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
if (gobj) {
r = drm_exec_lock_obj(&exec, gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index b6015157763a..3c351941701e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1105,7 +1105,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,

amdgpu_sync_create(&sync);

- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_lock_obj(&exec,
&ctx_data->meta_data_obj->tbo.base);
@@ -1176,7 +1176,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
struct drm_exec exec;
long r;

- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_lock_obj(&exec,
&ctx_data->meta_data_obj->tbo.base);
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
index 5d2809de4517..27d11c20d148 100644
--- a/drivers/gpu/drm/drm_exec.c
+++ b/drivers/gpu/drm/drm_exec.c
@@ -69,16 +69,25 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
* drm_exec_init - initialize a drm_exec object
* @exec: the drm_exec object to initialize
* @flags: controls locking behavior, see DRM_EXEC_* defines
+ * @nr: the initial # of objects
*
* Initialize the object and make sure that we can track locked objects.
+ *
+ * If nr is non-zero then it is used as the initial objects table size.
+ * In either case, the table will grow (be re-allocated) on demand.
*/
-void drm_exec_init(struct drm_exec *exec, uint32_t flags)
+void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr)
{
+ size_t sz = PAGE_SIZE;
+
+ if (nr)
+ sz = (size_t)nr * sizeof(void *);
+
exec->flags = flags;
- exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ exec->objects = kmalloc(sz, GFP_KERNEL);

/* If allocation here fails, just delay that till the first use */
- exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0;
+ exec->max_objects = exec->objects ? sz / sizeof(void *) : 0;
exec->num_objects = 0;
exec->contended = DRM_EXEC_DUMMY;
exec->prelocked = NULL;
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index 19024ce21fbb..f5930cc0b3fb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -103,7 +103,7 @@ nouveau_exec_job_submit(struct nouveau_job *job)

nouveau_uvmm_lock(uvmm);
drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(exec) {
struct drm_gpuva *va;

diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index aae780e4a4aa..3a9331a1c830 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1288,7 +1288,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
}

drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(exec) {
list_for_each_op(op, &bind_job->ops) {
struct drm_gpuva_op *va_op;
diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
index b5bf0b6da791..f1a66c048721 100644
--- a/include/drm/drm_exec.h
+++ b/include/drm/drm_exec.h
@@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec)
return !!exec->contended;
}

-void drm_exec_init(struct drm_exec *exec, uint32_t flags);
+void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr);
void drm_exec_fini(struct drm_exec *exec);
bool drm_exec_cleanup(struct drm_exec *exec);
int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);
--
2.41.0

2023-10-28 00:39:43

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH 2/7] drm/msm/gem: Remove submit_unlock_unpin_bo()

On Fri, 27 Oct 2023 at 19:59, Rob Clark <[email protected]> wrote:
>
> From: Rob Clark <[email protected]>
>
> The only point it is called is before pinning objects, so the "unpin"
> part of the name is fiction. Just remove call submit_cleanup_bo()

Nit: 'remove it and call'

Other than that:

Reviewed-by: Dmitry Baryshkov <[email protected]>

> directly.
>
> Signed-off-by: Rob Clark <[email protected]>
> ---
> drivers/gpu/drm/msm/msm_gem_submit.c | 10 ++--------
> 1 file changed, 2 insertions(+), 8 deletions(-)


--
With best wishes
Dmitry

2023-10-28 01:14:28

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH 3/7] drm/msm/gem: Don't queue job to sched in error cases

On Fri, 27 Oct 2023 at 19:59, Rob Clark <[email protected]> wrote:
>
> From: Rob Clark <[email protected]>
>
> We shouldn't be running the job in error cases. This also avoids having
> to think too hard about where the objs get unpinned (and if necessary,
> the resv takes over tracking that the obj is busy).. ie. error cases it
> always happens synchronously, and normal cases it happens from scheduler
> job_run() callback.
>
> Signed-off-by: Rob Clark <[email protected]>

Reviewed-by: Dmitry Baryshkov <[email protected]>

> ---
> drivers/gpu/drm/msm/msm_gem_submit.c | 3 +++
> 1 file changed, 3 insertions(+)

--
With best wishes
Dmitry

2023-10-30 08:05:50

by Christian König

[permalink] [raw]
Subject: Re: [PATCH 6/7] drm/exec: Pass in initial # of objects

Am 27.10.23 um 18:58 schrieb Rob Clark:
> From: Rob Clark <[email protected]>
>
> In cases where the # is known ahead of time, it is silly to do the table
> resize dance.

Ah, yes that was my initial implementation as well, but I ditched that
because nobody actually used it.

One comment below.

>
> Signed-off-by: Rob Clark <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++--
> drivers/gpu/drm/drm_exec.c | 15 ++++++++++++---
> drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
> drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
> include/drm/drm_exec.h | 2 +-
> 8 files changed, 22 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index efdb1c48f431..d27ca8f61929 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -65,7 +65,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
> }
>
> amdgpu_sync_create(&p->sync);
> - drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> + drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> index 720011019741..796fa6f1420b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> @@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> struct drm_exec exec;
> int r;
>
> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> drm_exec_until_all_locked(&exec) {
> r = amdgpu_vm_lock_pd(vm, &exec, 0);
> if (likely(!r))
> @@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> struct drm_exec exec;
> int r;
>
> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> drm_exec_until_all_locked(&exec) {
> r = amdgpu_vm_lock_pd(vm, &exec, 0);
> if (likely(!r))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index ca4d2d430e28..16f1715148ad 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
> struct drm_exec exec;
> long r;
>
> - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
> + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
> drm_exec_until_all_locked(&exec) {
> r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
> drm_exec_retry_on_contention(&exec);
> @@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> }
>
> drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> - DRM_EXEC_IGNORE_DUPLICATES);
> + DRM_EXEC_IGNORE_DUPLICATES, 0);
> drm_exec_until_all_locked(&exec) {
> if (gobj) {
> r = drm_exec_lock_obj(&exec, gobj);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index b6015157763a..3c351941701e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -1105,7 +1105,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
>
> amdgpu_sync_create(&sync);
>
> - drm_exec_init(&exec, 0);
> + drm_exec_init(&exec, 0, 0);
> drm_exec_until_all_locked(&exec) {
> r = drm_exec_lock_obj(&exec,
> &ctx_data->meta_data_obj->tbo.base);
> @@ -1176,7 +1176,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
> struct drm_exec exec;
> long r;
>
> - drm_exec_init(&exec, 0);
> + drm_exec_init(&exec, 0, 0);
> drm_exec_until_all_locked(&exec) {
> r = drm_exec_lock_obj(&exec,
> &ctx_data->meta_data_obj->tbo.base);
> diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
> index 5d2809de4517..27d11c20d148 100644
> --- a/drivers/gpu/drm/drm_exec.c
> +++ b/drivers/gpu/drm/drm_exec.c
> @@ -69,16 +69,25 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
> * drm_exec_init - initialize a drm_exec object
> * @exec: the drm_exec object to initialize
> * @flags: controls locking behavior, see DRM_EXEC_* defines
> + * @nr: the initial # of objects
> *
> * Initialize the object and make sure that we can track locked objects.
> + *
> + * If nr is non-zero then it is used as the initial objects table size.
> + * In either case, the table will grow (be re-allocated) on demand.
> */
> -void drm_exec_init(struct drm_exec *exec, uint32_t flags)
> +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr)
> {
> + size_t sz = PAGE_SIZE;
> +
> + if (nr)
> + sz = (size_t)nr * sizeof(void *);
> +
> exec->flags = flags;
> - exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
> + exec->objects = kmalloc(sz, GFP_KERNEL);

Please use k*v*malloc() here since we can't predict how large that will be.

With that fixed the patch is Reviewed-by: Christian König
<[email protected]>.

Regards,
Christian.

>
> /* If allocation here fails, just delay that till the first use */
> - exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0;
> + exec->max_objects = exec->objects ? sz / sizeof(void *) : 0;
> exec->num_objects = 0;
> exec->contended = DRM_EXEC_DUMMY;
> exec->prelocked = NULL;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
> index 19024ce21fbb..f5930cc0b3fb 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_exec.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
> @@ -103,7 +103,7 @@ nouveau_exec_job_submit(struct nouveau_job *job)
>
> nouveau_uvmm_lock(uvmm);
> drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> - DRM_EXEC_IGNORE_DUPLICATES);
> + DRM_EXEC_IGNORE_DUPLICATES, 0);
> drm_exec_until_all_locked(exec) {
> struct drm_gpuva *va;
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> index aae780e4a4aa..3a9331a1c830 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> @@ -1288,7 +1288,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
> }
>
> drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> - DRM_EXEC_IGNORE_DUPLICATES);
> + DRM_EXEC_IGNORE_DUPLICATES, 0);
> drm_exec_until_all_locked(exec) {
> list_for_each_op(op, &bind_job->ops) {
> struct drm_gpuva_op *va_op;
> diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
> index b5bf0b6da791..f1a66c048721 100644
> --- a/include/drm/drm_exec.h
> +++ b/include/drm/drm_exec.h
> @@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec)
> return !!exec->contended;
> }
>
> -void drm_exec_init(struct drm_exec *exec, uint32_t flags);
> +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr);
> void drm_exec_fini(struct drm_exec *exec);
> bool drm_exec_cleanup(struct drm_exec *exec);
> int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);

2023-10-30 13:39:00

by Rob Clark

[permalink] [raw]
Subject: Re: [PATCH 6/7] drm/exec: Pass in initial # of objects

On Mon, Oct 30, 2023 at 1:05 AM Christian König
<[email protected]> wrote:
>
> Am 27.10.23 um 18:58 schrieb Rob Clark:
> > From: Rob Clark <[email protected]>
> >
> > In cases where the # is known ahead of time, it is silly to do the table
> > resize dance.
>
> Ah, yes that was my initial implementation as well, but I ditched that
> because nobody actually used it.
>
> One comment below.
>
> >
> > Signed-off-by: Rob Clark <[email protected]>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
> > drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++--
> > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++--
> > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++--
> > drivers/gpu/drm/drm_exec.c | 15 ++++++++++++---
> > drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
> > drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
> > include/drm/drm_exec.h | 2 +-
> > 8 files changed, 22 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index efdb1c48f431..d27ca8f61929 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -65,7 +65,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
> > }
> >
> > amdgpu_sync_create(&p->sync);
> > - drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> > + drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> > return 0;
> > }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> > index 720011019741..796fa6f1420b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> > @@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> > struct drm_exec exec;
> > int r;
> >
> > - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> > + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> > drm_exec_until_all_locked(&exec) {
> > r = amdgpu_vm_lock_pd(vm, &exec, 0);
> > if (likely(!r))
> > @@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> > struct drm_exec exec;
> > int r;
> >
> > - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> > + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> > drm_exec_until_all_locked(&exec) {
> > r = amdgpu_vm_lock_pd(vm, &exec, 0);
> > if (likely(!r))
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > index ca4d2d430e28..16f1715148ad 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > @@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
> > struct drm_exec exec;
> > long r;
> >
> > - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
> > + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
> > drm_exec_until_all_locked(&exec) {
> > r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
> > drm_exec_retry_on_contention(&exec);
> > @@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > }
> >
> > drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> > - DRM_EXEC_IGNORE_DUPLICATES);
> > + DRM_EXEC_IGNORE_DUPLICATES, 0);
> > drm_exec_until_all_locked(&exec) {
> > if (gobj) {
> > r = drm_exec_lock_obj(&exec, gobj);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > index b6015157763a..3c351941701e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > @@ -1105,7 +1105,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
> >
> > amdgpu_sync_create(&sync);
> >
> > - drm_exec_init(&exec, 0);
> > + drm_exec_init(&exec, 0, 0);
> > drm_exec_until_all_locked(&exec) {
> > r = drm_exec_lock_obj(&exec,
> > &ctx_data->meta_data_obj->tbo.base);
> > @@ -1176,7 +1176,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
> > struct drm_exec exec;
> > long r;
> >
> > - drm_exec_init(&exec, 0);
> > + drm_exec_init(&exec, 0, 0);
> > drm_exec_until_all_locked(&exec) {
> > r = drm_exec_lock_obj(&exec,
> > &ctx_data->meta_data_obj->tbo.base);
> > diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
> > index 5d2809de4517..27d11c20d148 100644
> > --- a/drivers/gpu/drm/drm_exec.c
> > +++ b/drivers/gpu/drm/drm_exec.c
> > @@ -69,16 +69,25 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
> > * drm_exec_init - initialize a drm_exec object
> > * @exec: the drm_exec object to initialize
> > * @flags: controls locking behavior, see DRM_EXEC_* defines
> > + * @nr: the initial # of objects
> > *
> > * Initialize the object and make sure that we can track locked objects.
> > + *
> > + * If nr is non-zero then it is used as the initial objects table size.
> > + * In either case, the table will grow (be re-allocated) on demand.
> > */
> > -void drm_exec_init(struct drm_exec *exec, uint32_t flags)
> > +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr)
> > {
> > + size_t sz = PAGE_SIZE;
> > +
> > + if (nr)
> > + sz = (size_t)nr * sizeof(void *);
> > +
> > exec->flags = flags;
> > - exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
> > + exec->objects = kmalloc(sz, GFP_KERNEL);
>
> Please use k*v*malloc() here since we can't predict how large that will be.

or __GFP_NOWARN? If userspace (or kasan) is cheeky and asks for ~0
objects, we should probably just fail?

BR,
-R

> With that fixed the patch is Reviewed-by: Christian König
> <[email protected]>.
>
> Regards,
> Christian.
>
> >
> > /* If allocation here fails, just delay that till the first use */
> > - exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0;
> > + exec->max_objects = exec->objects ? sz / sizeof(void *) : 0;
> > exec->num_objects = 0;
> > exec->contended = DRM_EXEC_DUMMY;
> > exec->prelocked = NULL;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
> > index 19024ce21fbb..f5930cc0b3fb 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_exec.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
> > @@ -103,7 +103,7 @@ nouveau_exec_job_submit(struct nouveau_job *job)
> >
> > nouveau_uvmm_lock(uvmm);
> > drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> > - DRM_EXEC_IGNORE_DUPLICATES);
> > + DRM_EXEC_IGNORE_DUPLICATES, 0);
> > drm_exec_until_all_locked(exec) {
> > struct drm_gpuva *va;
> >
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> > index aae780e4a4aa..3a9331a1c830 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> > @@ -1288,7 +1288,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
> > }
> >
> > drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> > - DRM_EXEC_IGNORE_DUPLICATES);
> > + DRM_EXEC_IGNORE_DUPLICATES, 0);
> > drm_exec_until_all_locked(exec) {
> > list_for_each_op(op, &bind_job->ops) {
> > struct drm_gpuva_op *va_op;
> > diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
> > index b5bf0b6da791..f1a66c048721 100644
> > --- a/include/drm/drm_exec.h
> > +++ b/include/drm/drm_exec.h
> > @@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec)
> > return !!exec->contended;
> > }
> >
> > -void drm_exec_init(struct drm_exec *exec, uint32_t flags);
> > +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr);
> > void drm_exec_fini(struct drm_exec *exec);
> > bool drm_exec_cleanup(struct drm_exec *exec);
> > int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);
>

2023-10-30 16:01:45

by Christian König

[permalink] [raw]
Subject: Re: [PATCH 6/7] drm/exec: Pass in initial # of objects

Am 30.10.23 um 14:38 schrieb Rob Clark:
> On Mon, Oct 30, 2023 at 1:05 AM Christian König
> <[email protected]> wrote:
>> Am 27.10.23 um 18:58 schrieb Rob Clark:
>>> From: Rob Clark <[email protected]>
>>>
>>> In cases where the # is known ahead of time, it is silly to do the table
>>> resize dance.
>> Ah, yes that was my initial implementation as well, but I ditched that
>> because nobody actually used it.
>>
>> One comment below.
>>
>>> Signed-off-by: Rob Clark <[email protected]>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++--
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++--
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++--
>>> drivers/gpu/drm/drm_exec.c | 15 ++++++++++++---
>>> drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
>>> drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
>>> include/drm/drm_exec.h | 2 +-
>>> 8 files changed, 22 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index efdb1c48f431..d27ca8f61929 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -65,7 +65,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
>>> }
>>>
>>> amdgpu_sync_create(&p->sync);
>>> - drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
>>> + drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
>>> return 0;
>>> }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> index 720011019741..796fa6f1420b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> @@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>> struct drm_exec exec;
>>> int r;
>>>
>>> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
>>> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
>>> drm_exec_until_all_locked(&exec) {
>>> r = amdgpu_vm_lock_pd(vm, &exec, 0);
>>> if (likely(!r))
>>> @@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>> struct drm_exec exec;
>>> int r;
>>>
>>> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
>>> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
>>> drm_exec_until_all_locked(&exec) {
>>> r = amdgpu_vm_lock_pd(vm, &exec, 0);
>>> if (likely(!r))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index ca4d2d430e28..16f1715148ad 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
>>> struct drm_exec exec;
>>> long r;
>>>
>>> - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
>>> + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
>>> drm_exec_until_all_locked(&exec) {
>>> r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
>>> drm_exec_retry_on_contention(&exec);
>>> @@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>> }
>>>
>>> drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
>>> - DRM_EXEC_IGNORE_DUPLICATES);
>>> + DRM_EXEC_IGNORE_DUPLICATES, 0);
>>> drm_exec_until_all_locked(&exec) {
>>> if (gobj) {
>>> r = drm_exec_lock_obj(&exec, gobj);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>>> index b6015157763a..3c351941701e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>>> @@ -1105,7 +1105,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
>>>
>>> amdgpu_sync_create(&sync);
>>>
>>> - drm_exec_init(&exec, 0);
>>> + drm_exec_init(&exec, 0, 0);
>>> drm_exec_until_all_locked(&exec) {
>>> r = drm_exec_lock_obj(&exec,
>>> &ctx_data->meta_data_obj->tbo.base);
>>> @@ -1176,7 +1176,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
>>> struct drm_exec exec;
>>> long r;
>>>
>>> - drm_exec_init(&exec, 0);
>>> + drm_exec_init(&exec, 0, 0);
>>> drm_exec_until_all_locked(&exec) {
>>> r = drm_exec_lock_obj(&exec,
>>> &ctx_data->meta_data_obj->tbo.base);
>>> diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
>>> index 5d2809de4517..27d11c20d148 100644
>>> --- a/drivers/gpu/drm/drm_exec.c
>>> +++ b/drivers/gpu/drm/drm_exec.c
>>> @@ -69,16 +69,25 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
>>> * drm_exec_init - initialize a drm_exec object
>>> * @exec: the drm_exec object to initialize
>>> * @flags: controls locking behavior, see DRM_EXEC_* defines
>>> + * @nr: the initial # of objects
>>> *
>>> * Initialize the object and make sure that we can track locked objects.
>>> + *
>>> + * If nr is non-zero then it is used as the initial objects table size.
>>> + * In either case, the table will grow (be re-allocated) on demand.
>>> */
>>> -void drm_exec_init(struct drm_exec *exec, uint32_t flags)
>>> +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr)
>>> {
>>> + size_t sz = PAGE_SIZE;
>>> +
>>> + if (nr)
>>> + sz = (size_t)nr * sizeof(void *);
>>> +
>>> exec->flags = flags;
>>> - exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
>>> + exec->objects = kmalloc(sz, GFP_KERNEL);
>> Please use k*v*malloc() here since we can't predict how large that will be.
> or __GFP_NOWARN? If userspace (or kasan) is cheeky and asks for ~0
> objects, we should probably just fail?

Oh, good point! If this value is controlled by userspace we must be much
more careful.

Instead of __GFP_NOWARN or any other workaround we should use
kvmalloc_array() here.

Maybe turn the code upside down, in other words something like this here:

if (!nr)
    nr = PAGE_SIZE / sizeof(void *);

exec->objects = kvmalloc_array(nr, sizeof(void *), GFP_KERNEL);
exec->max_objects = exec->objects ? nr : 0;


Regards,
Christian.

>
> BR,
> -R
>
>> With that fixed the patch is Reviewed-by: Christian König
>> <[email protected]>.
>>
>> Regards,
>> Christian.
>>
>>> /* If allocation here fails, just delay that till the first use */
>>> - exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0;
>>> + exec->max_objects = exec->objects ? sz / sizeof(void *) : 0;
>>> exec->num_objects = 0;
>>> exec->contended = DRM_EXEC_DUMMY;
>>> exec->prelocked = NULL;
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
>>> index 19024ce21fbb..f5930cc0b3fb 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_exec.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
>>> @@ -103,7 +103,7 @@ nouveau_exec_job_submit(struct nouveau_job *job)
>>>
>>> nouveau_uvmm_lock(uvmm);
>>> drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
>>> - DRM_EXEC_IGNORE_DUPLICATES);
>>> + DRM_EXEC_IGNORE_DUPLICATES, 0);
>>> drm_exec_until_all_locked(exec) {
>>> struct drm_gpuva *va;
>>>
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
>>> index aae780e4a4aa..3a9331a1c830 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
>>> @@ -1288,7 +1288,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
>>> }
>>>
>>> drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
>>> - DRM_EXEC_IGNORE_DUPLICATES);
>>> + DRM_EXEC_IGNORE_DUPLICATES, 0);
>>> drm_exec_until_all_locked(exec) {
>>> list_for_each_op(op, &bind_job->ops) {
>>> struct drm_gpuva_op *va_op;
>>> diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
>>> index b5bf0b6da791..f1a66c048721 100644
>>> --- a/include/drm/drm_exec.h
>>> +++ b/include/drm/drm_exec.h
>>> @@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec)
>>> return !!exec->contended;
>>> }
>>>
>>> -void drm_exec_init(struct drm_exec *exec, uint32_t flags);
>>> +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr);
>>> void drm_exec_fini(struct drm_exec *exec);
>>> bool drm_exec_cleanup(struct drm_exec *exec);
>>> int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);

2023-10-30 18:27:33

by Rob Clark

[permalink] [raw]
Subject: Re: [PATCH 6/7] drm/exec: Pass in initial # of objects

On Mon, Oct 30, 2023 at 9:01 AM Christian König
<[email protected]> wrote:
>
> Am 30.10.23 um 14:38 schrieb Rob Clark:
> > On Mon, Oct 30, 2023 at 1:05 AM Christian König
> > <[email protected]> wrote:
> >> Am 27.10.23 um 18:58 schrieb Rob Clark:
> >>> From: Rob Clark <[email protected]>
> >>>
> >>> In cases where the # is known ahead of time, it is silly to do the table
> >>> resize dance.
> >> Ah, yes that was my initial implementation as well, but I ditched that
> >> because nobody actually used it.
> >>
> >> One comment below.
> >>
> >>> Signed-off-by: Rob Clark <[email protected]>
> >>> ---
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++--
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++--
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++--
> >>> drivers/gpu/drm/drm_exec.c | 15 ++++++++++++---
> >>> drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
> >>> drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
> >>> include/drm/drm_exec.h | 2 +-
> >>> 8 files changed, 22 insertions(+), 13 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> index efdb1c48f431..d27ca8f61929 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> @@ -65,7 +65,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
> >>> }
> >>>
> >>> amdgpu_sync_create(&p->sync);
> >>> - drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> >>> + drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> >>> return 0;
> >>> }
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>> index 720011019741..796fa6f1420b 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>> @@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> >>> struct drm_exec exec;
> >>> int r;
> >>>
> >>> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> >>> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> >>> drm_exec_until_all_locked(&exec) {
> >>> r = amdgpu_vm_lock_pd(vm, &exec, 0);
> >>> if (likely(!r))
> >>> @@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> >>> struct drm_exec exec;
> >>> int r;
> >>>
> >>> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> >>> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> >>> drm_exec_until_all_locked(&exec) {
> >>> r = amdgpu_vm_lock_pd(vm, &exec, 0);
> >>> if (likely(!r))
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>> index ca4d2d430e28..16f1715148ad 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>> @@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
> >>> struct drm_exec exec;
> >>> long r;
> >>>
> >>> - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
> >>> + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
> >>> drm_exec_until_all_locked(&exec) {
> >>> r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
> >>> drm_exec_retry_on_contention(&exec);
> >>> @@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> >>> }
> >>>
> >>> drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> >>> - DRM_EXEC_IGNORE_DUPLICATES);
> >>> + DRM_EXEC_IGNORE_DUPLICATES, 0);
> >>> drm_exec_until_all_locked(&exec) {
> >>> if (gobj) {
> >>> r = drm_exec_lock_obj(&exec, gobj);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> >>> index b6015157763a..3c351941701e 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> >>> @@ -1105,7 +1105,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
> >>>
> >>> amdgpu_sync_create(&sync);
> >>>
> >>> - drm_exec_init(&exec, 0);
> >>> + drm_exec_init(&exec, 0, 0);
> >>> drm_exec_until_all_locked(&exec) {
> >>> r = drm_exec_lock_obj(&exec,
> >>> &ctx_data->meta_data_obj->tbo.base);
> >>> @@ -1176,7 +1176,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
> >>> struct drm_exec exec;
> >>> long r;
> >>>
> >>> - drm_exec_init(&exec, 0);
> >>> + drm_exec_init(&exec, 0, 0);
> >>> drm_exec_until_all_locked(&exec) {
> >>> r = drm_exec_lock_obj(&exec,
> >>> &ctx_data->meta_data_obj->tbo.base);
> >>> diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
> >>> index 5d2809de4517..27d11c20d148 100644
> >>> --- a/drivers/gpu/drm/drm_exec.c
> >>> +++ b/drivers/gpu/drm/drm_exec.c
> >>> @@ -69,16 +69,25 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
> >>> * drm_exec_init - initialize a drm_exec object
> >>> * @exec: the drm_exec object to initialize
> >>> * @flags: controls locking behavior, see DRM_EXEC_* defines
> >>> + * @nr: the initial # of objects
> >>> *
> >>> * Initialize the object and make sure that we can track locked objects.
> >>> + *
> >>> + * If nr is non-zero then it is used as the initial objects table size.
> >>> + * In either case, the table will grow (be re-allocated) on demand.
> >>> */
> >>> -void drm_exec_init(struct drm_exec *exec, uint32_t flags)
> >>> +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr)
> >>> {
> >>> + size_t sz = PAGE_SIZE;
> >>> +
> >>> + if (nr)
> >>> + sz = (size_t)nr * sizeof(void *);
> >>> +
> >>> exec->flags = flags;
> >>> - exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
> >>> + exec->objects = kmalloc(sz, GFP_KERNEL);
> >> Please use k*v*malloc() here since we can't predict how large that will be.
> > or __GFP_NOWARN? If userspace (or kasan) is cheeky and asks for ~0
> > objects, we should probably just fail?
>
> Oh, good point! If this value is controlled by userspace we must be much
> more careful.
>
> Instead of __GFP_NOWARN or any other workaround we should use
> kvmalloc_array() here.
>
> Maybe turn the code upside down, in other words something like this here:
>
> if (!nr)
> nr = PAGE_SIZE / sizeof(void *);
>
> exec->objects = kvmalloc_array(nr, sizeof(void *), GFP_KERNEL);
> exec->max_objects = exec->objects ? nr : 0;

oh, good point

BR,
-R

>
> Regards,
> Christian.
>
> >
> > BR,
> > -R
> >
> >> With that fixed the patch is Reviewed-by: Christian König
> >> <[email protected]>.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> /* If allocation here fails, just delay that till the first use */
> >>> - exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0;
> >>> + exec->max_objects = exec->objects ? sz / sizeof(void *) : 0;
> >>> exec->num_objects = 0;
> >>> exec->contended = DRM_EXEC_DUMMY;
> >>> exec->prelocked = NULL;
> >>> diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
> >>> index 19024ce21fbb..f5930cc0b3fb 100644
> >>> --- a/drivers/gpu/drm/nouveau/nouveau_exec.c
> >>> +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
> >>> @@ -103,7 +103,7 @@ nouveau_exec_job_submit(struct nouveau_job *job)
> >>>
> >>> nouveau_uvmm_lock(uvmm);
> >>> drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> >>> - DRM_EXEC_IGNORE_DUPLICATES);
> >>> + DRM_EXEC_IGNORE_DUPLICATES, 0);
> >>> drm_exec_until_all_locked(exec) {
> >>> struct drm_gpuva *va;
> >>>
> >>> diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> >>> index aae780e4a4aa..3a9331a1c830 100644
> >>> --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> >>> +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> >>> @@ -1288,7 +1288,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
> >>> }
> >>>
> >>> drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> >>> - DRM_EXEC_IGNORE_DUPLICATES);
> >>> + DRM_EXEC_IGNORE_DUPLICATES, 0);
> >>> drm_exec_until_all_locked(exec) {
> >>> list_for_each_op(op, &bind_job->ops) {
> >>> struct drm_gpuva_op *va_op;
> >>> diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
> >>> index b5bf0b6da791..f1a66c048721 100644
> >>> --- a/include/drm/drm_exec.h
> >>> +++ b/include/drm/drm_exec.h
> >>> @@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec)
> >>> return !!exec->contended;
> >>> }
> >>>
> >>> -void drm_exec_init(struct drm_exec *exec, uint32_t flags);
> >>> +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr);
> >>> void drm_exec_fini(struct drm_exec *exec);
> >>> bool drm_exec_cleanup(struct drm_exec *exec);
> >>> int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);
>