2018-02-20 13:00:49

by Christian König

[permalink] [raw]
Subject: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

amdgpu needs to verify if userspace sends us valid addresses and the simplest
way of doing this is to check if the buffer object is locked with the ticket
of the current submission.

Clean up the access to the ww_mutex internals by providing a function
for this and extend the check to the thread owning the underlying mutex.

v2: split amdgpu changes into separate patch as suggested by Alex
v3: change logic as suggested by Daniel

Signed-off-by: Christian König <[email protected]>
---
include/linux/ww_mutex.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 39fda195bf78..14e4149d3d9d 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -358,4 +358,21 @@ static inline bool ww_mutex_is_locked(struct ww_mutex *lock)
return mutex_is_locked(&lock->base);
}

+/**
+ * ww_mutex_is_owned_by - is the w/w mutex locked by this task in that context
+ * @lock: the mutex to be queried
+ * @ctx: the w/w acquire context to test
+ *
+ * If @ctx is not NULL test if the mutex is owned by this context.
+ * If @ctx is NULL test if the mutex is owned by the current thread.
+ */
+static inline bool ww_mutex_is_owned_by(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ctx)
+{
+ if (ctx)
+ return likely(READ_ONCE(lock->ctx) == ctx);
+ else
+ return likely(__mutex_owner(&lock->base) == current);
+}
+
#endif
--
2.14.1



2018-02-20 12:59:30

by Christian König

[permalink] [raw]
Subject: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

This solves the problem that when we swapout a BO from a domain we
sometimes couldn't make room for it because holding the lock blocks all
other BOs with this reservation object.

Signed-off-by: Christian König <[email protected]>
---
drivers/gpu/drm/ttm/ttm_bo.c | 33 ++++++++++++++++-----------------
1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d90b1cf10b27..3a44c2ee4155 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -713,31 +713,30 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
EXPORT_SYMBOL(ttm_bo_eviction_valuable);

/**
- * Check the target bo is allowable to be evicted or swapout, including cases:
- *
- * a. if share same reservation object with ctx->resv, have assumption
- * reservation objects should already be locked, so not lock again and
- * return true directly when either the opreation allow_reserved_eviction
- * or the target bo already is in delayed free list;
- *
- * b. Otherwise, trylock it.
+ * Check if the target bo is allowed to be evicted or swapedout.
*/
static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
- struct ttm_operation_ctx *ctx, bool *locked)
+ struct ttm_operation_ctx *ctx,
+ bool *locked)
{
- bool ret = false;
+ /* First check if we can lock it */
+ *locked = reservation_object_trylock(bo->resv);
+ if (*locked)
+ return true;

- *locked = false;
+ /* Check if it's locked because it is part of the current operation */
if (bo->resv == ctx->resv) {
reservation_object_assert_held(bo->resv);
- if (ctx->allow_reserved_eviction || !list_empty(&bo->ddestroy))
- ret = true;
- } else {
- *locked = reservation_object_trylock(bo->resv);
- ret = *locked;
+ return ctx->allow_reserved_eviction ||
+ !list_empty(&bo->ddestroy);
}

- return ret;
+ /* Check if it's locked because it was already evicted */
+ if (ww_mutex_is_owned_by(&bo->resv->lock, NULL))
+ return true;
+
+ /* Some other thread is using it, don't touch it */
+ return false;
}

static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
--
2.14.1


2018-02-20 13:00:21

by Christian König

[permalink] [raw]
Subject: [PATCH 4/4] drm/ttm: keep BOs reserved until end of eviction

This avoids problems when BOs are evicted but directly moved back into
the domain from other threads.

Signed-off-by: Christian König <[email protected]>
---
drivers/gpu/drm/ttm/ttm_bo.c | 37 +++++++++++++++++++++++++++++--------
1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3a44c2ee4155..593a0216faff 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -742,7 +742,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
uint32_t mem_type,
const struct ttm_place *place,
- struct ttm_operation_ctx *ctx)
+ struct ttm_operation_ctx *ctx,
+ struct list_head *evicted)
{
struct ttm_bo_global *glob = bdev->glob;
struct ttm_mem_type_manager *man = &bdev->man[mem_type];
@@ -792,17 +793,28 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,

ret = ttm_bo_evict(bo, ctx);
if (locked) {
- ttm_bo_unreserve(bo);
+ list_add_tail(&bo->lru, evicted);
} else {
spin_lock(&glob->lru_lock);
ttm_bo_add_to_lru(bo);
spin_unlock(&glob->lru_lock);
+ kref_put(&bo->list_kref, ttm_bo_release_list);
}

- kref_put(&bo->list_kref, ttm_bo_release_list);
return ret;
}

+static void ttm_mem_evict_cleanup(struct list_head *evicted)
+{
+ struct ttm_buffer_object *bo, *tmp;
+
+ list_for_each_entry_safe(bo, tmp, evicted, lru) {
+ list_del_init(&bo->lru);
+ ttm_bo_unreserve(bo);
+ kref_put(&bo->list_kref, ttm_bo_release_list);
+ }
+}
+
void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
{
struct ttm_mem_type_manager *man = &bo->bdev->man[mem->mem_type];
@@ -852,20 +864,26 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+ struct list_head evicted;
int ret;

+ INIT_LIST_HEAD(&evicted);
do {
ret = (*man->func->get_node)(man, bo, place, mem);
if (unlikely(ret != 0))
return ret;
if (mem->mm_node)
break;
- ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
+ ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, &evicted);
if (unlikely(ret != 0))
- return ret;
+ goto error;
} while (1);
mem->mem_type = mem_type;
- return ttm_bo_add_move_fence(bo, man, mem);
+ ret = ttm_bo_add_move_fence(bo, man, mem);
+
+error:
+ ttm_mem_evict_cleanup(&evicted);
+ return ret;
}

static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man,
@@ -1345,6 +1363,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
struct ttm_operation_ctx ctx = { false, false };
struct ttm_mem_type_manager *man = &bdev->man[mem_type];
struct ttm_bo_global *glob = bdev->glob;
+ struct list_head evicted;
struct dma_fence *fence;
int ret;
unsigned i;
@@ -1352,18 +1371,20 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
/*
* Can't use standard list traversal since we're unlocking.
*/
-
+ INIT_LIST_HEAD(&evicted);
spin_lock(&glob->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
- ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
+ ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
+ &evicted);
if (ret)
return ret;
spin_lock(&glob->lru_lock);
}
}
spin_unlock(&glob->lru_lock);
+ ttm_mem_evict_cleanup(&evicted);

spin_lock(&man->move_lock);
fence = dma_fence_get(man->move);
--
2.14.1


2018-02-20 13:02:03

by Christian König

[permalink] [raw]
Subject: [PATCH 2/4] drm/amdgpu: use new ww_mutex_is_owned_by function

Instead of accessing ww_mutex internals directly use the provided
function to check if the ww_mutex was indeed locked by the current
command submission.

Signed-off-by: Christian König <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index eaa3cb0c3ad1..6db26a3144dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1594,7 +1594,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
*map = mapping;

/* Double check that the BO is reserved by this CS */
- if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
+ if (!ww_mutex_is_owned_by(&(*bo)->tbo.resv->lock, &parser->ticket))
return -EINVAL;

if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
--
2.14.1


2018-02-20 13:13:54

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

On Tue, Feb 20, 2018 at 01:58:26PM +0100, Christian K?nig wrote:
> amdgpu needs to verify if userspace sends us valid addresses and the simplest
> way of doing this is to check if the buffer object is locked with the ticket
> of the current submission.
>
> Clean up the access to the ww_mutex internals by providing a function
> for this and extend the check to the thread owning the underlying mutex.

> Signed-off-by: Christian K?nig <[email protected]>

Much thanks for Cc'ing the relevant maintainers :/

> ---
> include/linux/ww_mutex.h | 17 +++++++++++++++++
> 1 file changed, 17 insertions(+)
>
> diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
> index 39fda195bf78..14e4149d3d9d 100644
> --- a/include/linux/ww_mutex.h
> +++ b/include/linux/ww_mutex.h
> @@ -358,4 +358,21 @@ static inline bool ww_mutex_is_locked(struct ww_mutex *lock)
> return mutex_is_locked(&lock->base);
> }
>
> +/**
> + * ww_mutex_is_owned_by - is the w/w mutex locked by this task in that context
> + * @lock: the mutex to be queried
> + * @ctx: the w/w acquire context to test
> + *
> + * If @ctx is not NULL test if the mutex is owned by this context.
> + * If @ctx is NULL test if the mutex is owned by the current thread.
> + */
> +static inline bool ww_mutex_is_owned_by(struct ww_mutex *lock,
> + struct ww_acquire_ctx *ctx)
> +{
> + if (ctx)
> + return likely(READ_ONCE(lock->ctx) == ctx);
> + else
> + return likely(__mutex_owner(&lock->base) == current);
> +}

Much better than the previous version. If you want to bike-shed, you can
leave out the 'else' and unindent the last line.

I do worry about potential users of .ctx = NULL, though. It makes it far
too easy to do recursive locking, which is something we should strongly
discourage.

2018-02-20 13:27:59

by Christian König

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

Am 20.02.2018 um 14:12 schrieb Peter Zijlstra:
> On Tue, Feb 20, 2018 at 01:58:26PM +0100, Christian König wrote:
>> amdgpu needs to verify if userspace sends us valid addresses and the simplest
>> way of doing this is to check if the buffer object is locked with the ticket
>> of the current submission.
>>
>> Clean up the access to the ww_mutex internals by providing a function
>> for this and extend the check to the thread owning the underlying mutex.
>> Signed-off-by: Christian König <[email protected]>
> Much thanks for Cc'ing the relevant maintainers :/

Sorry for that.

>> ---
>> include/linux/ww_mutex.h | 17 +++++++++++++++++
>> 1 file changed, 17 insertions(+)
>>
>> diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
>> index 39fda195bf78..14e4149d3d9d 100644
>> --- a/include/linux/ww_mutex.h
>> +++ b/include/linux/ww_mutex.h
>> @@ -358,4 +358,21 @@ static inline bool ww_mutex_is_locked(struct ww_mutex *lock)
>> return mutex_is_locked(&lock->base);
>> }
>>
>> +/**
>> + * ww_mutex_is_owned_by - is the w/w mutex locked by this task in that context
>> + * @lock: the mutex to be queried
>> + * @ctx: the w/w acquire context to test
>> + *
>> + * If @ctx is not NULL test if the mutex is owned by this context.
>> + * If @ctx is NULL test if the mutex is owned by the current thread.
>> + */
>> +static inline bool ww_mutex_is_owned_by(struct ww_mutex *lock,
>> + struct ww_acquire_ctx *ctx)
>> +{
>> + if (ctx)
>> + return likely(READ_ONCE(lock->ctx) == ctx);
>> + else
>> + return likely(__mutex_owner(&lock->base) == current);
>> +}
> Much better than the previous version. If you want to bike-shed, you can
> leave out the 'else' and unindent the last line.

Thanks for the suggestion, going to do this.

> I do worry about potential users of .ctx = NULL, though. It makes it far
> too easy to do recursive locking, which is something we should strongly
> discourage.

Well, one of the addressed use cases is indeed checking for recursive
locking. But recursive locking is something rather normal for ww_mutex
and we are just exercising an existing code path.

E.g. the most common use case for the ww_mutex is in the graphics
drivers where usespace sends us a list of buffer objects to work with.

Now when userspace sends us duplicates in that buffer list the
expectation is to get -EALREADY from ww_mutex_lock when we try to lock
the same ww_mutex twice.

Depending on the driver this then results in returning an error code to
userspace or just ignoring the duplicate (because of backward
compatibility).


The intention behind this function is now to a) be able to extend those
checks to make sure user space doesn't sends us potentially harmful
nonsense and b) allow to check for recursion in TTM during buffer object
eviction which uses ww_mutex_trylock instead of ww_mutex_lock.

Regards,
Christian.

2018-02-20 13:58:10

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

On Tue, Feb 20, 2018 at 02:26:55PM +0100, Christian K?nig wrote:
> > > +static inline bool ww_mutex_is_owned_by(struct ww_mutex *lock,
> > > + struct ww_acquire_ctx *ctx)
> > > +{
> > > + if (ctx)
> > > + return likely(READ_ONCE(lock->ctx) == ctx);
> > > + else
> > > + return likely(__mutex_owner(&lock->base) == current);
> > > +}
> > Much better than the previous version. If you want to bike-shed, you can
> > leave out the 'else' and unindent the last line.
>
> Thanks for the suggestion, going to do this.

You might also want likely(ctx), since ww_mutex without ctx is
a-typical I would think.

> > I do worry about potential users of .ctx = NULL, though. It makes it far
> > too easy to do recursive locking, which is something we should strongly
> > discourage.
>
> Well, one of the addressed use cases is indeed checking for recursive
> locking. But recursive locking is something rather normal for ww_mutex and
> we are just exercising an existing code path.

But that would be the ctx case, right? I'm not sure there is a lot of
!ctx use out there, and in that case it really is rather like a normal
mutex.

> E.g. the most common use case for the ww_mutex is in the graphics drivers
> where usespace sends us a list of buffer objects to work with.
>
> Now when userspace sends us duplicates in that buffer list the expectation
> is to get -EALREADY from ww_mutex_lock when we try to lock the same ww_mutex
> twice.

Right, I remember that much.. :-)

> The intention behind this function is now to a) be able to extend those
> checks to make sure user space doesn't sends us potentially harmful nonsense
> and b) allow to check for recursion in TTM during buffer object eviction
> which uses ww_mutex_trylock instead of ww_mutex_lock.

OK, but neither case would in fact need the !ctx case right? That's just
there for completeness sake?

But yes, I cannot think of a better fallback there either.


2018-02-20 14:05:29

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

On Tue, Feb 20, 2018 at 01:58:26PM +0100, Christian K?nig wrote:
> amdgpu needs to verify if userspace sends us valid addresses and the simplest
> way of doing this is to check if the buffer object is locked with the ticket
> of the current submission.
>
> Clean up the access to the ww_mutex internals by providing a function
> for this and extend the check to the thread owning the underlying mutex.
>
> v2: split amdgpu changes into separate patch as suggested by Alex
> v3: change logic as suggested by Daniel
>
> Signed-off-by: Christian K?nig <[email protected]>
> ---
> include/linux/ww_mutex.h | 17 +++++++++++++++++
> 1 file changed, 17 insertions(+)
>
> diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
> index 39fda195bf78..14e4149d3d9d 100644
> --- a/include/linux/ww_mutex.h
> +++ b/include/linux/ww_mutex.h
> @@ -358,4 +358,21 @@ static inline bool ww_mutex_is_locked(struct ww_mutex *lock)
> return mutex_is_locked(&lock->base);
> }
>
> +/**
> + * ww_mutex_is_owned_by - is the w/w mutex locked by this task in that context
> + * @lock: the mutex to be queried
> + * @ctx: the w/w acquire context to test
> + *
> + * If @ctx is not NULL test if the mutex is owned by this context.
> + * If @ctx is NULL test if the mutex is owned by the current thread.

This is a bit much how and not so much why, but I couldn't come up with a
concise text what was materially better.

Reviewed-by: Daniel Vetter <[email protected]>

> + */
> +static inline bool ww_mutex_is_owned_by(struct ww_mutex *lock,
> + struct ww_acquire_ctx *ctx)
> +{
> + if (ctx)
> + return likely(READ_ONCE(lock->ctx) == ctx);
> + else
> + return likely(__mutex_owner(&lock->base) == current);
> +}
> +
> #endif
> --
> 2.14.1
>
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2018-02-20 14:36:25

by Christian König

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

Am 20.02.2018 um 14:57 schrieb Peter Zijlstra:
> On Tue, Feb 20, 2018 at 02:26:55PM +0100, Christian König wrote:
>>>> +static inline bool ww_mutex_is_owned_by(struct ww_mutex *lock,
>>>> + struct ww_acquire_ctx *ctx)
>>>> +{
>>>> + if (ctx)
>>>> + return likely(READ_ONCE(lock->ctx) == ctx);
>>>> + else
>>>> + return likely(__mutex_owner(&lock->base) == current);
>>>> +}
>>> Much better than the previous version. If you want to bike-shed, you can
>>> leave out the 'else' and unindent the last line.
>> Thanks for the suggestion, going to do this.
> You might also want likely(ctx), since ww_mutex without ctx is
> a-typical I would think.
>
>>> I do worry about potential users of .ctx = NULL, though. It makes it far
>>> too easy to do recursive locking, which is something we should strongly
>>> discourage.
>> Well, one of the addressed use cases is indeed checking for recursive
>> locking. But recursive locking is something rather normal for ww_mutex and
>> we are just exercising an existing code path.
> But that would be the ctx case, right? I'm not sure there is a lot of
> !ctx use out there, and in that case it really is rather like a normal
> mutex.
>
>> E.g. the most common use case for the ww_mutex is in the graphics drivers
>> where usespace sends us a list of buffer objects to work with.
>>
>> Now when userspace sends us duplicates in that buffer list the expectation
>> is to get -EALREADY from ww_mutex_lock when we try to lock the same ww_mutex
>> twice.
> Right, I remember that much.. :-)
>
>> The intention behind this function is now to a) be able to extend those
>> checks to make sure user space doesn't sends us potentially harmful nonsense
>> and b) allow to check for recursion in TTM during buffer object eviction
>> which uses ww_mutex_trylock instead of ww_mutex_lock.
> OK, but neither case would in fact need the !ctx case right? That's just
> there for completeness sake?

Unfortunately not. TTM uses trylock to lock BOs which are about to be
evicted to make room for all the BOs locked with a ctx.

I need to be able to distinct between the BOs which are trylocked and
those which are locked with a ctx.

Writing this I actually noticed the current version is buggy, cause even
when we check the mutex owner we still need to make sure that the ctx in
the lock is NULL.

Time for v4 of the patch,
Christian.

>
> But yes, I cannot think of a better fallback there either.
>


2018-02-20 14:55:21

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

On Tue, Feb 20, 2018 at 03:34:07PM +0100, Christian K?nig wrote:
> > OK, but neither case would in fact need the !ctx case right? That's just
> > there for completeness sake?
>
> Unfortunately not. TTM uses trylock to lock BOs which are about to be
> evicted to make room for all the BOs locked with a ctx.
>
> I need to be able to distinct between the BOs which are trylocked and those
> which are locked with a ctx.
>
> Writing this I actually noticed the current version is buggy, cause even
> when we check the mutex owner we still need to make sure that the ctx in the
> lock is NULL.

Hurm... I can't remember why trylocks behave like that, and it seems
rather unfortunate / inconsistent.

Chris, Maarten, do either one of you remember?

I'm thinking that if we do acquire the trylock, the thing should join
the ctx such that a subsequent contending mutex_lock() can ww right.

2018-02-20 15:06:50

by Christian König

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

Am 20.02.2018 um 15:54 schrieb Peter Zijlstra:
> On Tue, Feb 20, 2018 at 03:34:07PM +0100, Christian König wrote:
>>> OK, but neither case would in fact need the !ctx case right? That's just
>>> there for completeness sake?
>> Unfortunately not. TTM uses trylock to lock BOs which are about to be
>> evicted to make room for all the BOs locked with a ctx.
>>
>> I need to be able to distinct between the BOs which are trylocked and those
>> which are locked with a ctx.
>>
>> Writing this I actually noticed the current version is buggy, cause even
>> when we check the mutex owner we still need to make sure that the ctx in the
>> lock is NULL.
> Hurm... I can't remember why trylocks behave like that, and it seems
> rather unfortunate / inconsistent.

Actually for me that is rather fortunate, cause I need to distinct
between the locks acquired through trylock and lock.

In other words when the amdgpu does it's checking if userspace sends
nonsense to the kernel it should only get a green signal when the lock
was intentionally locked using the context.

If the lock was just taken because TTM trylocked it because TTM had to
evict the BO to make room then the check should fail and we need to tell
userspace that it did something wrong.

Regards,
Christian.

> Chris, Maarten, do either one of you remember?
>
> I'm thinking that if we do acquire the trylock, the thing should join
> the ctx such that a subsequent contending mutex_lock() can ww right.
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


2018-02-20 15:23:10

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

On Tue, Feb 20, 2018 at 04:05:49PM +0100, Christian K?nig wrote:
> Am 20.02.2018 um 15:54 schrieb Peter Zijlstra:
> > On Tue, Feb 20, 2018 at 03:34:07PM +0100, Christian K?nig wrote:
> > > > OK, but neither case would in fact need the !ctx case right? That's just
> > > > there for completeness sake?
> > > Unfortunately not. TTM uses trylock to lock BOs which are about to be
> > > evicted to make room for all the BOs locked with a ctx.
> > >
> > > I need to be able to distinct between the BOs which are trylocked and those
> > > which are locked with a ctx.
> > >
> > > Writing this I actually noticed the current version is buggy, cause even
> > > when we check the mutex owner we still need to make sure that the ctx in the
> > > lock is NULL.
> > Hurm... I can't remember why trylocks behave like that, and it seems
> > rather unfortunate / inconsistent.
>
> Actually for me that is rather fortunate, cause I need to distinct between
> the locks acquired through trylock and lock.

I suppose that would always be possible using:
ww_mutex_trylock(.ctx=NULL), and it could be that there simply weren't
any immediate uses for a !NULL trylock and it was thus not implemented.

But that is all very long ago..


2018-02-20 23:59:25

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

On Tue, Feb 20, 2018 at 04:21:58PM +0100, Peter Zijlstra wrote:
> On Tue, Feb 20, 2018 at 04:05:49PM +0100, Christian K?nig wrote:
> > Am 20.02.2018 um 15:54 schrieb Peter Zijlstra:
> > > On Tue, Feb 20, 2018 at 03:34:07PM +0100, Christian K?nig wrote:
> > > > > OK, but neither case would in fact need the !ctx case right? That's just
> > > > > there for completeness sake?
> > > > Unfortunately not. TTM uses trylock to lock BOs which are about to be
> > > > evicted to make room for all the BOs locked with a ctx.
> > > >
> > > > I need to be able to distinct between the BOs which are trylocked and those
> > > > which are locked with a ctx.
> > > >
> > > > Writing this I actually noticed the current version is buggy, cause even
> > > > when we check the mutex owner we still need to make sure that the ctx in the
> > > > lock is NULL.
> > > Hurm... I can't remember why trylocks behave like that, and it seems
> > > rather unfortunate / inconsistent.
> >
> > Actually for me that is rather fortunate, cause I need to distinct between
> > the locks acquired through trylock and lock.
>
> I suppose that would always be possible using:
> ww_mutex_trylock(.ctx=NULL), and it could be that there simply weren't
> any immediate uses for a !NULL trylock and it was thus not implemented.
>
> But that is all very long ago..

I think we simple never had a use-case for interleaving ww_mutex_lock(ctx)
and ww_mutex_trylock(ctx). Nesting multiple trylocks in ctx-locks happens
plenty, but not further:

The common use-case for that is locking a bunch of buffers you need (for
command submission or whatever), and then trylocking other buffers to make
space for the buffers you need to move into VRAM. I guess if only
trylocking buffers doesn't succeed in freeing up enough VRAM then we could
go into blocking ww_mutex_locks which need the ctx (and which would need
all the trylock-acquired buffers to be annotated with the ctx too). TTM
currently tries to be far enough away from that corner case (using a
defensive "never use more than 50% of all memory for gfx" approach) that
it doesn't seem to need that.

Once we get there it should indeed be simply to add a ctx parameter to
ww_mutex_trylock to fix this case. The TTM side rework is definitely going
to be the much bigger issue here ...
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2018-02-21 10:55:35

by Maarten Lankhorst

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

Op 21-02-18 om 00:56 schreef Daniel Vetter:
> On Tue, Feb 20, 2018 at 04:21:58PM +0100, Peter Zijlstra wrote:
>> On Tue, Feb 20, 2018 at 04:05:49PM +0100, Christian König wrote:
>>> Am 20.02.2018 um 15:54 schrieb Peter Zijlstra:
>>>> On Tue, Feb 20, 2018 at 03:34:07PM +0100, Christian König wrote:
>>>>>> OK, but neither case would in fact need the !ctx case right? That's just
>>>>>> there for completeness sake?
>>>>> Unfortunately not. TTM uses trylock to lock BOs which are about to be
>>>>> evicted to make room for all the BOs locked with a ctx.
>>>>>
>>>>> I need to be able to distinct between the BOs which are trylocked and those
>>>>> which are locked with a ctx.
>>>>>
>>>>> Writing this I actually noticed the current version is buggy, cause even
>>>>> when we check the mutex owner we still need to make sure that the ctx in the
>>>>> lock is NULL.
>>>> Hurm... I can't remember why trylocks behave like that, and it seems
>>>> rather unfortunate / inconsistent.
>>> Actually for me that is rather fortunate, cause I need to distinct between
>>> the locks acquired through trylock and lock.
>> I suppose that would always be possible using:
>> ww_mutex_trylock(.ctx=NULL), and it could be that there simply weren't
>> any immediate uses for a !NULL trylock and it was thus not implemented.
>>
>> But that is all very long ago..
> I think we simple never had a use-case for interleaving ww_mutex_lock(ctx)
> and ww_mutex_trylock(ctx). Nesting multiple trylocks in ctx-locks happens
> plenty, but not further:
>
> The common use-case for that is locking a bunch of buffers you need (for
> command submission or whatever), and then trylocking other buffers to make
> space for the buffers you need to move into VRAM. I guess if only
> trylocking buffers doesn't succeed in freeing up enough VRAM then we could
> go into blocking ww_mutex_locks which need the ctx (and which would need
> all the trylock-acquired buffers to be annotated with the ctx too). TTM
> currently tries to be far enough away from that corner case (using a
> defensive "never use more than 50% of all memory for gfx" approach) that
> it doesn't seem to need that.
>
> Once we get there it should indeed be simply to add a ctx parameter to
> ww_mutex_trylock to fix this case. The TTM side rework is definitely going
> to be the much bigger issue here ...
> -Daniel

Yes, I think fixing trylock to take a ctx parameter would be a better fix than ww_mutex_is_owned_by..


2018-02-21 11:51:17

by Christian König

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

Am 21.02.2018 um 11:54 schrieb Maarten Lankhorst:
> Op 21-02-18 om 00:56 schreef Daniel Vetter:
>> On Tue, Feb 20, 2018 at 04:21:58PM +0100, Peter Zijlstra wrote:
>>> On Tue, Feb 20, 2018 at 04:05:49PM +0100, Christian König wrote:
>>>> Am 20.02.2018 um 15:54 schrieb Peter Zijlstra:
>>>>> On Tue, Feb 20, 2018 at 03:34:07PM +0100, Christian König wrote:
>>>>>>> OK, but neither case would in fact need the !ctx case right? That's just
>>>>>>> there for completeness sake?
>>>>>> Unfortunately not. TTM uses trylock to lock BOs which are about to be
>>>>>> evicted to make room for all the BOs locked with a ctx.
>>>>>>
>>>>>> I need to be able to distinct between the BOs which are trylocked and those
>>>>>> which are locked with a ctx.
>>>>>>
>>>>>> Writing this I actually noticed the current version is buggy, cause even
>>>>>> when we check the mutex owner we still need to make sure that the ctx in the
>>>>>> lock is NULL.
>>>>> Hurm... I can't remember why trylocks behave like that, and it seems
>>>>> rather unfortunate / inconsistent.
>>>> Actually for me that is rather fortunate, cause I need to distinct between
>>>> the locks acquired through trylock and lock.
>>> I suppose that would always be possible using:
>>> ww_mutex_trylock(.ctx=NULL), and it could be that there simply weren't
>>> any immediate uses for a !NULL trylock and it was thus not implemented.
>>>
>>> But that is all very long ago..
>> I think we simple never had a use-case for interleaving ww_mutex_lock(ctx)
>> and ww_mutex_trylock(ctx). Nesting multiple trylocks in ctx-locks happens
>> plenty, but not further:
>>
>> The common use-case for that is locking a bunch of buffers you need (for
>> command submission or whatever), and then trylocking other buffers to make
>> space for the buffers you need to move into VRAM. I guess if only
>> trylocking buffers doesn't succeed in freeing up enough VRAM then we could
>> go into blocking ww_mutex_locks which need the ctx (and which would need
>> all the trylock-acquired buffers to be annotated with the ctx too). TTM
>> currently tries to be far enough away from that corner case (using a
>> defensive "never use more than 50% of all memory for gfx" approach) that
>> it doesn't seem to need that.
>>
>> Once we get there it should indeed be simply to add a ctx parameter to
>> ww_mutex_trylock to fix this case. The TTM side rework is definitely going
>> to be the much bigger issue here ...
>> -Daniel
> Yes, I think fixing trylock to take a ctx parameter would be a better fix than ww_mutex_is_owned_by..

Yeah, but as I noted now multiple times that won't work.

See I need to distinct between the BOs acquired with and without a
context. Otherwise the whole approach doesn't make much sense.

Christian.

2018-02-21 21:11:39

by Emil Velikov

[permalink] [raw]
Subject: Re: [PATCH 1/4] locking/ww_mutex: add ww_mutex_is_owned_by function v3

On 20 February 2018 at 13:12, Peter Zijlstra <[email protected]> wrote:
> On Tue, Feb 20, 2018 at 01:58:26PM +0100, Christian König wrote:
>> amdgpu needs to verify if userspace sends us valid addresses and the simplest
>> way of doing this is to check if the buffer object is locked with the ticket
>> of the current submission.
>>
>> Clean up the access to the ww_mutex internals by providing a function
>> for this and extend the check to the thread owning the underlying mutex.
>
>> Signed-off-by: Christian König <[email protected]>
>
> Much thanks for Cc'ing the relevant maintainers :/
>
Doubt it's intentional. The get-maintainer script seems confused and
lists no maintainers?

$ ./scripts/get_maintainer.pl include/linux/ww_mutex.h
[email protected] (open list)

While the normal mutex header works fine.

$ ./scripts/get_maintainer.pl include/linux/mutex.h
Peter Zijlstra <[email protected]> (maintainer:LOCKING PRIMITIVES)
Ingo Molnar <[email protected]> (maintainer:LOCKING PRIMITIVES)
[email protected] (open list:LOCKING PRIMITIVES)

HTH
Emil

2018-02-23 09:30:24

by He, Roger

[permalink] [raw]
Subject: RE: [PATCH 4/4] drm/ttm: keep BOs reserved until end of eviction

looks good to me. Reviewed-by: Roger He <[email protected]>

-----Original Message-----
From: dri-devel [mailto:[email protected]] On Behalf Of Christian K?nig
Sent: Tuesday, February 20, 2018 8:58 PM
To: [email protected]; [email protected]; [email protected]
Subject: [PATCH 4/4] drm/ttm: keep BOs reserved until end of eviction

This avoids problems when BOs are evicted but directly moved back into the domain from other threads.

Signed-off-by: Christian König <[email protected]>
---
drivers/gpu/drm/ttm/ttm_bo.c | 37 +++++++++++++++++++++++++++++--------
1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3a44c2ee4155..593a0216faff 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -742,7 +742,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
uint32_t mem_type,
const struct ttm_place *place,
- struct ttm_operation_ctx *ctx)
+ struct ttm_operation_ctx *ctx,
+ struct list_head *evicted)
{
struct ttm_bo_global *glob = bdev->glob;
struct ttm_mem_type_manager *man = &bdev->man[mem_type]; @@ -792,17 +793,28 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,

ret = ttm_bo_evict(bo, ctx);
if (locked) {
- ttm_bo_unreserve(bo);
+ list_add_tail(&bo->lru, evicted);
} else {
spin_lock(&glob->lru_lock);
ttm_bo_add_to_lru(bo);
spin_unlock(&glob->lru_lock);
+ kref_put(&bo->list_kref, ttm_bo_release_list);
}

- kref_put(&bo->list_kref, ttm_bo_release_list);
return ret;
}

+static void ttm_mem_evict_cleanup(struct list_head *evicted) {
+ struct ttm_buffer_object *bo, *tmp;
+
+ list_for_each_entry_safe(bo, tmp, evicted, lru) {
+ list_del_init(&bo->lru);
+ ttm_bo_unreserve(bo);
+ kref_put(&bo->list_kref, ttm_bo_release_list);
+ }
+}
+
void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) {
struct ttm_mem_type_manager *man = &bo->bdev->man[mem->mem_type]; @@ -852,20 +864,26 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, {
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+ struct list_head evicted;
int ret;

+ INIT_LIST_HEAD(&evicted);
do {
ret = (*man->func->get_node)(man, bo, place, mem);
if (unlikely(ret != 0))
return ret;
if (mem->mm_node)
break;
- ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
+ ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, &evicted);
if (unlikely(ret != 0))
- return ret;
+ goto error;
} while (1);
mem->mem_type = mem_type;
- return ttm_bo_add_move_fence(bo, man, mem);
+ ret = ttm_bo_add_move_fence(bo, man, mem);
+
+error:
+ ttm_mem_evict_cleanup(&evicted);
+ return ret;
}

static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man, @@ -1345,6 +1363,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
struct ttm_operation_ctx ctx = { false, false };
struct ttm_mem_type_manager *man = &bdev->man[mem_type];
struct ttm_bo_global *glob = bdev->glob;
+ struct list_head evicted;
struct dma_fence *fence;
int ret;
unsigned i;
@@ -1352,18 +1371,20 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
/*
* Can't use standard list traversal since we're unlocking.
*/
-
+ INIT_LIST_HEAD(&evicted);
spin_lock(&glob->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
- ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
+ ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
+ &evicted);
if (ret)
return ret;
spin_lock(&glob->lru_lock);
}
}
spin_unlock(&glob->lru_lock);
+ ttm_mem_evict_cleanup(&evicted);

spin_lock(&man->move_lock);
fence = dma_fence_get(man->move);
--
2.14.1

_______________________________________________
dri-devel mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/dri-devel

2018-02-23 09:47:58

by He, Roger

[permalink] [raw]
Subject: RE: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.



-----Original Message-----
From: dri-devel [mailto:[email protected]] On Behalf Of Christian K?nig
Sent: Tuesday, February 20, 2018 8:58 PM
To: [email protected]; [email protected]; [email protected]
Subject: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

This solves the problem that when we swapout a BO from a domain we sometimes couldn't make room for it because holding the lock blocks all other BOs with this reservation object.

Signed-off-by: Christian König <[email protected]>
---
drivers/gpu/drm/ttm/ttm_bo.c | 33 ++++++++++++++++-----------------
1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d90b1cf10b27..3a44c2ee4155 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -713,31 +713,30 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, EXPORT_SYMBOL(ttm_bo_eviction_valuable);

/**
- * Check the target bo is allowable to be evicted or swapout, including cases:
- *
- * a. if share same reservation object with ctx->resv, have assumption
- * reservation objects should already be locked, so not lock again and
- * return true directly when either the opreation allow_reserved_eviction
- * or the target bo already is in delayed free list;
- *
- * b. Otherwise, trylock it.
+ * Check if the target bo is allowed to be evicted or swapedout.
*/
static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
- struct ttm_operation_ctx *ctx, bool *locked)
+ struct ttm_operation_ctx *ctx,
+ bool *locked)
{
- bool ret = false;
+ /* First check if we can lock it */
+ *locked = reservation_object_trylock(bo->resv);
+ if (*locked)
+ return true;

- *locked = false;
+ /* Check if it's locked because it is part of the current operation */
if (bo->resv == ctx->resv) {
reservation_object_assert_held(bo->resv);
- if (ctx->allow_reserved_eviction || !list_empty(&bo->ddestroy))
- ret = true;
- } else {
- *locked = reservation_object_trylock(bo->resv);
- ret = *locked;
+ return ctx->allow_reserved_eviction ||
+ !list_empty(&bo->ddestroy);
}

- return ret;
+ /* Check if it's locked because it was already evicted */
+ if (ww_mutex_is_owned_by(&bo->resv->lock, NULL))
+ return true;

For the special case: when command submission with Per-VM-BO enabled,
All BOs a/b/c are always valid BO. After the validation of BOs a and b,
when validation of BO c, is it possible to return true and then evict BO a and b by mistake ?
Because a/b/c share same task_struct.

Thanks
Roger(Hongbo.He)

+ /* Some other thread is using it, don't touch it */
+ return false;
}

static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
--
2.14.1

_______________________________________________
dri-devel mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/dri-devel

2018-02-23 09:49:28

by He, Roger

[permalink] [raw]
Subject: RE: [PATCH 2/4] drm/amdgpu: use new ww_mutex_is_owned_by function


Reviewed-by: Roger He <[email protected]>

-----Original Message-----
From: amd-gfx [mailto:[email protected]] On Behalf Of Christian K?nig
Sent: Tuesday, February 20, 2018 8:58 PM
To: [email protected]; [email protected]; [email protected]
Subject: [PATCH 2/4] drm/amdgpu: use new ww_mutex_is_owned_by function

Instead of accessing ww_mutex internals directly use the provided function to check if the ww_mutex was indeed locked by the current command submission.

Signed-off-by: Christian König <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index eaa3cb0c3ad1..6db26a3144dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1594,7 +1594,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
*map = mapping;

/* Double check that the BO is reserved by this CS */
- if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
+ if (!ww_mutex_is_owned_by(&(*bo)->tbo.resv->lock, &parser->ticket))
return -EINVAL;

if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
--
2.14.1

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

2018-02-23 12:06:38

by Christian König

[permalink] [raw]
Subject: Re: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

Am 23.02.2018 um 10:46 schrieb He, Roger:
>
> -----Original Message-----
> From: dri-devel [mailto:[email protected]] On Behalf Of Christian K?nig
> Sent: Tuesday, February 20, 2018 8:58 PM
> To: [email protected]; [email protected]; [email protected]
> Subject: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.
>
> This solves the problem that when we swapout a BO from a domain we sometimes couldn't make room for it because holding the lock blocks all other BOs with this reservation object.
>
> Signed-off-by: Christian König <[email protected]>
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 33 ++++++++++++++++-----------------
> 1 file changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d90b1cf10b27..3a44c2ee4155 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -713,31 +713,30 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>
> /**
> - * Check the target bo is allowable to be evicted or swapout, including cases:
> - *
> - * a. if share same reservation object with ctx->resv, have assumption
> - * reservation objects should already be locked, so not lock again and
> - * return true directly when either the opreation allow_reserved_eviction
> - * or the target bo already is in delayed free list;
> - *
> - * b. Otherwise, trylock it.
> + * Check if the target bo is allowed to be evicted or swapedout.
> */
> static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> - struct ttm_operation_ctx *ctx, bool *locked)
> + struct ttm_operation_ctx *ctx,
> + bool *locked)
> {
> - bool ret = false;
> + /* First check if we can lock it */
> + *locked = reservation_object_trylock(bo->resv);
> + if (*locked)
> + return true;
>
> - *locked = false;
> + /* Check if it's locked because it is part of the current operation */
> if (bo->resv == ctx->resv) {
> reservation_object_assert_held(bo->resv);
> - if (ctx->allow_reserved_eviction || !list_empty(&bo->ddestroy))
> - ret = true;
> - } else {
> - *locked = reservation_object_trylock(bo->resv);
> - ret = *locked;
> + return ctx->allow_reserved_eviction ||
> + !list_empty(&bo->ddestroy);
> }
>
> - return ret;
> + /* Check if it's locked because it was already evicted */
> + if (ww_mutex_is_owned_by(&bo->resv->lock, NULL))
> + return true;
>
> For the special case: when command submission with Per-VM-BO enabled,
> All BOs a/b/c are always valid BO. After the validation of BOs a and b,
> when validation of BO c, is it possible to return true and then evict BO a and b by mistake ?
> Because a/b/c share same task_struct.

No, that's why I check the context as well. BOs explicitly reserved have
a non NULL context while BOs trylocked for swapout have a NULL context.

Christian.

>
> Thanks
> Roger(Hongbo.He)
>
> + /* Some other thread is using it, don't touch it */
> + return false;
> }
>
> static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> --
> 2.14.1
>
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


2018-02-24 03:37:54

by He, Roger

[permalink] [raw]
Subject: RE: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.



-----Original Message-----
From: Christian König [mailto:[email protected]]
Sent: Friday, February 23, 2018 8:06 PM
To: He, Roger <[email protected]>; [email protected]; [email protected]; [email protected]
Subject: Re: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

Am 23.02.2018 um 10:46 schrieb He, Roger:
>
> -----Original Message-----
> From: dri-devel [mailto:[email protected]] On
> Behalf Of Christian K?nig
> Sent: Tuesday, February 20, 2018 8:58 PM
> To: [email protected]; [email protected];
> [email protected]
> Subject: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.
>
> This solves the problem that when we swapout a BO from a domain we sometimes couldn't make room for it because holding the lock blocks all other BOs with this reservation object.
>
> Signed-off-by: Christian König <[email protected]>
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 33 ++++++++++++++++-----------------
> 1 file changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> b/drivers/gpu/drm/ttm/ttm_bo.c index d90b1cf10b27..3a44c2ee4155 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -713,31 +713,30 @@ bool ttm_bo_eviction_valuable(struct
> ttm_buffer_object *bo, EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>
> /**
> - * Check the target bo is allowable to be evicted or swapout, including cases:
> - *
> - * a. if share same reservation object with ctx->resv, have
> assumption
> - * reservation objects should already be locked, so not lock again
> and
> - * return true directly when either the opreation
> allow_reserved_eviction
> - * or the target bo already is in delayed free list;
> - *
> - * b. Otherwise, trylock it.
> + * Check if the target bo is allowed to be evicted or swapedout.
> */
> static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> - struct ttm_operation_ctx *ctx, bool *locked)
> + struct ttm_operation_ctx *ctx,
> + bool *locked)
> {
> - bool ret = false;
> + /* First check if we can lock it */
> + *locked = reservation_object_trylock(bo->resv);
> + if (*locked)
> + return true;
>
> - *locked = false;
> + /* Check if it's locked because it is part of the current operation
> +*/
> if (bo->resv == ctx->resv) {
> reservation_object_assert_held(bo->resv);
> - if (ctx->allow_reserved_eviction || !list_empty(&bo->ddestroy))
> - ret = true;
> - } else {
> - *locked = reservation_object_trylock(bo->resv);
> - ret = *locked;
> + return ctx->allow_reserved_eviction ||
> + !list_empty(&bo->ddestroy);
> }
>
> - return ret;
> + /* Check if it's locked because it was already evicted */
> + if (ww_mutex_is_owned_by(&bo->resv->lock, NULL))
> + return true;
>
> For the special case: when command submission with Per-VM-BO enabled,
> All BOs a/b/c are always valid BO. After the validation of BOs a and
> b, when validation of BO c, is it possible to return true and then evict BO a and b by mistake ?
> Because a/b/c share same task_struct.

No, that's why I check the context as well. BOs explicitly reserved have a non NULL context while BOs trylocked for swapout have a NULL context.

When BOs have a non NULL context only when command submission and reserved by ttm_eu_re6serve_buffers .
But for Per-VM-BO a/b/c they always are not in BO list, so they will be not reserved and have always NULL context.
So above case also can happen. Anything missing here?

Thanks
Roger(Hongbo.He)
>
> + /* Some other thread is using it, don't touch it */
> + return false;
> }
>
> static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> --
> 2.14.1
>
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

2018-02-24 03:47:06

by He, Roger

[permalink] [raw]
Subject: RE: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

I missed the Per-VM-BO share the reservation object with root bo. So context is not NULL here.
So, this patch is:

Reviewed-by: Roger He <[email protected]>

Thanks
Roger(Hongbo.He)
-----Original Message-----
From: Christian König [mailto:[email protected]]
Sent: Friday, February 23, 2018 8:06 PM
To: He, Roger <[email protected]>; [email protected]; [email protected]; [email protected]
Subject: Re: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

Am 23.02.2018 um 10:46 schrieb He, Roger:
>
> -----Original Message-----
> From: dri-devel [mailto:[email protected]] On
> Behalf Of Christian K?nig
> Sent: Tuesday, February 20, 2018 8:58 PM
> To: [email protected]; [email protected];
> [email protected]
> Subject: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.
>
> This solves the problem that when we swapout a BO from a domain we sometimes couldn't make room for it because holding the lock blocks all other BOs with this reservation object.
>
> Signed-off-by: Christian König <[email protected]>
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 33 ++++++++++++++++-----------------
> 1 file changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> b/drivers/gpu/drm/ttm/ttm_bo.c index d90b1cf10b27..3a44c2ee4155 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -713,31 +713,30 @@ bool ttm_bo_eviction_valuable(struct
> ttm_buffer_object *bo, EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>
> /**
> - * Check the target bo is allowable to be evicted or swapout, including cases:
> - *
> - * a. if share same reservation object with ctx->resv, have
> assumption
> - * reservation objects should already be locked, so not lock again
> and
> - * return true directly when either the opreation
> allow_reserved_eviction
> - * or the target bo already is in delayed free list;
> - *
> - * b. Otherwise, trylock it.
> + * Check if the target bo is allowed to be evicted or swapedout.
> */
> static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> - struct ttm_operation_ctx *ctx, bool *locked)
> + struct ttm_operation_ctx *ctx,
> + bool *locked)
> {
> - bool ret = false;
> + /* First check if we can lock it */
> + *locked = reservation_object_trylock(bo->resv);
> + if (*locked)
> + return true;
>
> - *locked = false;
> + /* Check if it's locked because it is part of the current operation
> +*/
> if (bo->resv == ctx->resv) {
> reservation_object_assert_held(bo->resv);
> - if (ctx->allow_reserved_eviction || !list_empty(&bo->ddestroy))
> - ret = true;
> - } else {
> - *locked = reservation_object_trylock(bo->resv);
> - ret = *locked;
> + return ctx->allow_reserved_eviction ||
> + !list_empty(&bo->ddestroy);
> }
>
> - return ret;
> + /* Check if it's locked because it was already evicted */
> + if (ww_mutex_is_owned_by(&bo->resv->lock, NULL))
> + return true;
>
> For the special case: when command submission with Per-VM-BO enabled,
> All BOs a/b/c are always valid BO. After the validation of BOs a and
> b, when validation of BO c, is it possible to return true and then evict BO a and b by mistake ?
> Because a/b/c share same task_struct.

No, that's why I check the context as well. BOs explicitly reserved have a non NULL context while BOs trylocked for swapout have a NULL context.

BOs have a non NULL context only when command submission and reserved by ttm_eu_re6serve_buffers .
But for Per-VM-BO a/b/c they always are not in BO list, so they will be not reserved and have always NULL context.
So above case also can happen. Anything missing here?

>
> + /* Some other thread is using it, don't touch it */
> + return false;
> }
>
> static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> --
> 2.14.1
>
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/dri-devel