2021-11-11 19:19:54

by Rob Clark

[permalink] [raw]
Subject: [PATCH 0/2] drm/msm: wait_fence fixes

From: Rob Clark <[email protected]>

A couple of wait_fence related fixes.

Rob Clark (2):
drm/msm: Fix wait_fence submitqueue leak
drm/msm: Restore error return on invalid fence

drivers/gpu/drm/msm/msm_drv.c | 49 ++++++++++++++++++----------
drivers/gpu/drm/msm/msm_gem_submit.c | 1 +
drivers/gpu/drm/msm/msm_gpu.h | 3 ++
3 files changed, 36 insertions(+), 17 deletions(-)

--
2.31.1



2021-11-11 19:19:56

by Rob Clark

[permalink] [raw]
Subject: [PATCH 1/2] drm/msm: Fix wait_fence submitqueue leak

From: Rob Clark <[email protected]>

We weren't dropping the submitqueue reference in all paths. In
particular, when the fence has already been signalled. Split out
a helper to simplify handling this in the various different return
paths.

Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr")
Signed-off-by: Rob Clark <[email protected]>
---
drivers/gpu/drm/msm/msm_drv.c | 49 +++++++++++++++++++++--------------
1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 73e827641024..cb14d997c174 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -961,29 +961,12 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
return ret;
}

-static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
- struct drm_file *file)
+static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
+ ktime_t timeout)
{
- struct msm_drm_private *priv = dev->dev_private;
- struct drm_msm_wait_fence *args = data;
- ktime_t timeout = to_ktime(args->timeout);
- struct msm_gpu_submitqueue *queue;
- struct msm_gpu *gpu = priv->gpu;
struct dma_fence *fence;
int ret;

- if (args->pad) {
- DRM_ERROR("invalid pad: %08x\n", args->pad);
- return -EINVAL;
- }
-
- if (!gpu)
- return 0;
-
- queue = msm_submitqueue_get(file->driver_priv, args->queueid);
- if (!queue)
- return -ENOENT;
-
/*
* Map submitqueue scoped "seqno" (which is actually an idr key)
* back to underlying dma-fence
@@ -995,7 +978,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
ret = mutex_lock_interruptible(&queue->lock);
if (ret)
return ret;
- fence = idr_find(&queue->fence_idr, args->fence);
+ fence = idr_find(&queue->fence_idr, fence_id);
if (fence)
fence = dma_fence_get_rcu(fence);
mutex_unlock(&queue->lock);
@@ -1011,6 +994,32 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
}

dma_fence_put(fence);
+
+ return ret;
+}
+
+static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct msm_drm_private *priv = dev->dev_private;
+ struct drm_msm_wait_fence *args = data;
+ struct msm_gpu_submitqueue *queue;
+ int ret;
+
+ if (args->pad) {
+ DRM_ERROR("invalid pad: %08x\n", args->pad);
+ return -EINVAL;
+ }
+
+ if (!priv->gpu)
+ return 0;
+
+ queue = msm_submitqueue_get(file->driver_priv, args->queueid);
+ if (!queue)
+ return -ENOENT;
+
+ ret = wait_fence(queue, args->fence, to_ktime(args->timeout));
+
msm_submitqueue_put(queue);

return ret;
--
2.31.1


2021-11-11 19:20:00

by Rob Clark

[permalink] [raw]
Subject: [PATCH 2/2] drm/msm: Restore error return on invalid fence

From: Rob Clark <[email protected]>

When converting to use an idr to map userspace fence seqno values back
to a dma_fence, we lost the error return when userspace passes seqno
that is larger than the last submitted fence. Restore this check.

Reported-by: Akhil P Oommen <[email protected]>
Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr")
Signed-off-by: Rob Clark <[email protected]>
---
Note: I will rebase "drm/msm: Handle fence rollover" on top of this,
to simplify backporting this patch to stable kernels

drivers/gpu/drm/msm/msm_drv.c | 6 ++++++
drivers/gpu/drm/msm/msm_gem_submit.c | 1 +
drivers/gpu/drm/msm/msm_gpu.h | 3 +++
3 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index cb14d997c174..56500eb5219e 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -967,6 +967,12 @@ static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
struct dma_fence *fence;
int ret;

+ if (fence_id > queue->last_fence) {
+ DRM_ERROR_RATELIMITED("waiting on invalid fence: %u (of %u)\n",
+ fence_id, queue->last_fence);
+ return -EINVAL;
+ }
+
/*
* Map submitqueue scoped "seqno" (which is actually an idr key)
* back to underlying dma-fence
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 151d19e4453c..a38f23be497d 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -911,6 +911,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
drm_sched_entity_push_job(&submit->base, queue->entity);

args->fence = submit->fence_id;
+ queue->last_fence = submit->fence_id;

msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
msm_process_post_deps(post_deps, args->nr_out_syncobjs,
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index bd4e0024033e..e73a5bb03544 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -376,6 +376,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
* @ring_nr: the ringbuffer used by this submitqueue, which is determined
* by the submitqueue's priority
* @faults: the number of GPU hangs associated with this submitqueue
+ * @last_fence: the sequence number of the last allocated fence (for error
+ * checking)
* @ctx: the per-drm_file context associated with the submitqueue (ie.
* which set of pgtables do submits jobs associated with the
* submitqueue use)
@@ -391,6 +393,7 @@ struct msm_gpu_submitqueue {
u32 flags;
u32 ring_nr;
int faults;
+ uint32_t last_fence;
struct msm_file_private *ctx;
struct list_head node;
struct idr fence_idr;
--
2.31.1


2021-11-15 14:44:32

by Akhil P Oommen

[permalink] [raw]
Subject: Re: [PATCH 2/2] drm/msm: Restore error return on invalid fence

On 11/12/2021 12:54 AM, Rob Clark wrote:
> From: Rob Clark <[email protected]>
>
> When converting to use an idr to map userspace fence seqno values back
> to a dma_fence, we lost the error return when userspace passes seqno
> that is larger than the last submitted fence. Restore this check.
>
> Reported-by: Akhil P Oommen <[email protected]>
> Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr")
> Signed-off-by: Rob Clark <[email protected]>
> ---
> Note: I will rebase "drm/msm: Handle fence rollover" on top of this,
> to simplify backporting this patch to stable kernels
>
> drivers/gpu/drm/msm/msm_drv.c | 6 ++++++
> drivers/gpu/drm/msm/msm_gem_submit.c | 1 +
> drivers/gpu/drm/msm/msm_gpu.h | 3 +++
> 3 files changed, 10 insertions(+)
>
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index cb14d997c174..56500eb5219e 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -967,6 +967,12 @@ static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
> struct dma_fence *fence;
> int ret;
>
> + if (fence_id > queue->last_fence) {

But fence_id can wrap around and then this check won't be valid.

-Akhil.

> + DRM_ERROR_RATELIMITED("waiting on invalid fence: %u (of %u)\n",
> + fence_id, queue->last_fence);
> + return -EINVAL;
> + }
> +
> /*
> * Map submitqueue scoped "seqno" (which is actually an idr key)
> * back to underlying dma-fence
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 151d19e4453c..a38f23be497d 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -911,6 +911,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> drm_sched_entity_push_job(&submit->base, queue->entity);
>
> args->fence = submit->fence_id;
> + queue->last_fence = submit->fence_id;
>
> msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
> msm_process_post_deps(post_deps, args->nr_out_syncobjs,
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index bd4e0024033e..e73a5bb03544 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -376,6 +376,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
> * @ring_nr: the ringbuffer used by this submitqueue, which is determined
> * by the submitqueue's priority
> * @faults: the number of GPU hangs associated with this submitqueue
> + * @last_fence: the sequence number of the last allocated fence (for error
> + * checking)
> * @ctx: the per-drm_file context associated with the submitqueue (ie.
> * which set of pgtables do submits jobs associated with the
> * submitqueue use)
> @@ -391,6 +393,7 @@ struct msm_gpu_submitqueue {
> u32 flags;
> u32 ring_nr;
> int faults;
> + uint32_t last_fence;
> struct msm_file_private *ctx;
> struct list_head node;
> struct idr fence_idr;
>


2021-11-15 16:52:13

by Rob Clark

[permalink] [raw]
Subject: Re: [PATCH 2/2] drm/msm: Restore error return on invalid fence

On Mon, Nov 15, 2021 at 6:43 AM Akhil P Oommen <[email protected]> wrote:
>
> On 11/12/2021 12:54 AM, Rob Clark wrote:
> > From: Rob Clark <[email protected]>
> >
> > When converting to use an idr to map userspace fence seqno values back
> > to a dma_fence, we lost the error return when userspace passes seqno
> > that is larger than the last submitted fence. Restore this check.
> >
> > Reported-by: Akhil P Oommen <[email protected]>
> > Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr")
> > Signed-off-by: Rob Clark <[email protected]>
> > ---
> > Note: I will rebase "drm/msm: Handle fence rollover" on top of this,
> > to simplify backporting this patch to stable kernels
> >
> > drivers/gpu/drm/msm/msm_drv.c | 6 ++++++
> > drivers/gpu/drm/msm/msm_gem_submit.c | 1 +
> > drivers/gpu/drm/msm/msm_gpu.h | 3 +++
> > 3 files changed, 10 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > index cb14d997c174..56500eb5219e 100644
> > --- a/drivers/gpu/drm/msm/msm_drv.c
> > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > @@ -967,6 +967,12 @@ static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
> > struct dma_fence *fence;
> > int ret;
> >
> > + if (fence_id > queue->last_fence) {
>
> But fence_id can wrap around and then this check won't be valid.

that is correct, but see my note about rebasing "drm/msm: Handle fence
rollover" on top of this patch, so this patch could be more easily
cherry-picked to stable/lts branches

BR,
-R

> -Akhil.
>
> > + DRM_ERROR_RATELIMITED("waiting on invalid fence: %u (of %u)\n",
> > + fence_id, queue->last_fence);
> > + return -EINVAL;
> > + }
> > +
> > /*
> > * Map submitqueue scoped "seqno" (which is actually an idr key)
> > * back to underlying dma-fence
> > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> > index 151d19e4453c..a38f23be497d 100644
> > --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> > @@ -911,6 +911,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > drm_sched_entity_push_job(&submit->base, queue->entity);
> >
> > args->fence = submit->fence_id;
> > + queue->last_fence = submit->fence_id;
> >
> > msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
> > msm_process_post_deps(post_deps, args->nr_out_syncobjs,
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> > index bd4e0024033e..e73a5bb03544 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.h
> > +++ b/drivers/gpu/drm/msm/msm_gpu.h
> > @@ -376,6 +376,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
> > * @ring_nr: the ringbuffer used by this submitqueue, which is determined
> > * by the submitqueue's priority
> > * @faults: the number of GPU hangs associated with this submitqueue
> > + * @last_fence: the sequence number of the last allocated fence (for error
> > + * checking)
> > * @ctx: the per-drm_file context associated with the submitqueue (ie.
> > * which set of pgtables do submits jobs associated with the
> > * submitqueue use)
> > @@ -391,6 +393,7 @@ struct msm_gpu_submitqueue {
> > u32 flags;
> > u32 ring_nr;
> > int faults;
> > + uint32_t last_fence;
> > struct msm_file_private *ctx;
> > struct list_head node;
> > struct idr fence_idr;
> >
>

2021-11-16 06:00:04

by Akhil P Oommen

[permalink] [raw]
Subject: Re: [PATCH 2/2] drm/msm: Restore error return on invalid fence

On 11/15/2021 10:26 PM, Rob Clark wrote:
> On Mon, Nov 15, 2021 at 6:43 AM Akhil P Oommen <[email protected]> wrote:
>>
>> On 11/12/2021 12:54 AM, Rob Clark wrote:
>>> From: Rob Clark <[email protected]>
>>>
>>> When converting to use an idr to map userspace fence seqno values back
>>> to a dma_fence, we lost the error return when userspace passes seqno
>>> that is larger than the last submitted fence. Restore this check.
>>>
>>> Reported-by: Akhil P Oommen <[email protected]>
>>> Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr")
>>> Signed-off-by: Rob Clark <[email protected]>
>>> ---
>>> Note: I will rebase "drm/msm: Handle fence rollover" on top of this,
>>> to simplify backporting this patch to stable kernels
>>>
>>> drivers/gpu/drm/msm/msm_drv.c | 6 ++++++
>>> drivers/gpu/drm/msm/msm_gem_submit.c | 1 +
>>> drivers/gpu/drm/msm/msm_gpu.h | 3 +++
>>> 3 files changed, 10 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
>>> index cb14d997c174..56500eb5219e 100644
>>> --- a/drivers/gpu/drm/msm/msm_drv.c
>>> +++ b/drivers/gpu/drm/msm/msm_drv.c
>>> @@ -967,6 +967,12 @@ static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
>>> struct dma_fence *fence;
>>> int ret;
>>>
>>> + if (fence_id > queue->last_fence) {
>>
>> But fence_id can wrap around and then this check won't be valid.
>
> that is correct, but see my note about rebasing "drm/msm: Handle fence
> rollover" on top of this patch, so this patch could be more easily
> cherry-picked to stable/lts branches
>
> BR,
> -R

Missed that. Thanks.

Reviewed-by: Akhil P Oommen <[email protected]>

-Akhil.
>
>> -Akhil.
>>
>>> + DRM_ERROR_RATELIMITED("waiting on invalid fence: %u (of %u)\n",
>>> + fence_id, queue->last_fence);
>>> + return -EINVAL;
>>> + }
>>> +
>>> /*
>>> * Map submitqueue scoped "seqno" (which is actually an idr key)
>>> * back to underlying dma-fence
>>> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
>>> index 151d19e4453c..a38f23be497d 100644
>>> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
>>> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
>>> @@ -911,6 +911,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>>> drm_sched_entity_push_job(&submit->base, queue->entity);
>>>
>>> args->fence = submit->fence_id;
>>> + queue->last_fence = submit->fence_id;
>>>
>>> msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
>>> msm_process_post_deps(post_deps, args->nr_out_syncobjs,
>>> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
>>> index bd4e0024033e..e73a5bb03544 100644
>>> --- a/drivers/gpu/drm/msm/msm_gpu.h
>>> +++ b/drivers/gpu/drm/msm/msm_gpu.h
>>> @@ -376,6 +376,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
>>> * @ring_nr: the ringbuffer used by this submitqueue, which is determined
>>> * by the submitqueue's priority
>>> * @faults: the number of GPU hangs associated with this submitqueue
>>> + * @last_fence: the sequence number of the last allocated fence (for error
>>> + * checking)
>>> * @ctx: the per-drm_file context associated with the submitqueue (ie.
>>> * which set of pgtables do submits jobs associated with the
>>> * submitqueue use)
>>> @@ -391,6 +393,7 @@ struct msm_gpu_submitqueue {
>>> u32 flags;
>>> u32 ring_nr;
>>> int faults;
>>> + uint32_t last_fence;
>>> struct msm_file_private *ctx;
>>> struct list_head node;
>>> struct idr fence_idr;
>>>
>>