2023-04-26 09:33:57

by Cai Huoqing

[permalink] [raw]
Subject: [PATCH] accel/habanalabs: Make use of rhashtable

Using rhashtable to accelerate the search for userptr by address,
instead of using a list.

Preferably, the lookup complexity of a hash table is O(1).

This patch will speedup the method
hl_userptr_is_pinned by rhashtable_lookup_fast.

Signed-off-by: Cai Huoqing <[email protected]>
---
.../habanalabs/common/command_submission.c | 16 ++++++---
drivers/accel/habanalabs/common/habanalabs.h | 19 +++++-----
drivers/accel/habanalabs/common/memory.c | 35 +++++++++++++------
drivers/accel/habanalabs/gaudi/gaudi.c | 16 +++++----
drivers/accel/habanalabs/goya/goya.c | 14 +++++---
5 files changed, 66 insertions(+), 34 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index af9d2e22c6e7..35c2ab934396 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -312,7 +312,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
parser.job_id = job->id;

parser.hw_queue_id = job->hw_queue_id;
- parser.job_userptr_list = &job->userptr_list;
+ parser.job_userptr_ht = &job->userptr_ht;
parser.patched_cb = NULL;
parser.user_cb = job->user_cb;
parser.user_cb_size = job->user_cb_size;
@@ -351,7 +351,7 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
struct hl_cs *cs = job->cs;

if (is_cb_patched(hdev, job)) {
- hl_userptr_delete_list(hdev, &job->userptr_list);
+ hl_userptr_delete_list(hdev, &job->userptr_ht);

/*
* We might arrive here from rollback and patched CB wasn't
@@ -1284,6 +1284,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
{
struct hl_cs_job *job;
+ int rc;

job = kzalloc(sizeof(*job), GFP_ATOMIC);
if (!job)
@@ -1296,13 +1297,20 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
job->queue_type = queue_type;
job->is_kernel_allocated_cb = is_kernel_allocated_cb;

- if (is_cb_patched(hdev, job))
- INIT_LIST_HEAD(&job->userptr_list);
+ if (is_cb_patched(hdev, job)) {
+ rc = rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params);
+ if (rc)
+ goto free_job;
+ }

if (job->queue_type == QUEUE_TYPE_EXT)
INIT_WORK(&job->finish_work, job_wq_completion);

return job;
+
+free_job:
+ kfree(job);
+ return NULL;
}

static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index eaae69a9f817..9c876d1480d2 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -19,6 +19,7 @@
#include <linux/dma-direction.h>
#include <linux/scatterlist.h>
#include <linux/hashtable.h>
+#include <linux/rhashtable.h>
#include <linux/debugfs.h>
#include <linux/rwsem.h>
#include <linux/eventfd.h>
@@ -540,6 +541,8 @@ struct hl_hints_range {
u64 end_addr;
};

+extern const struct rhashtable_params hl_userptr_rht_params;
+
/**
* struct asic_fixed_properties - ASIC specific immutable properties.
* @hw_queues_props: H/W queues properties.
@@ -1915,7 +1918,7 @@ struct hl_ctx_mgr {
/**
* struct hl_userptr - memory mapping chunk information
* @vm_type: type of the VM.
- * @job_node: linked-list node for hanging the object on the Job's list.
+ * @job_node: hashtable node for hanging the object on the Job's list.
* @pages: pointer to struct page array
* @npages: size of @pages array
* @sgt: pointer to the scatter-gather table that holds the pages.
@@ -1928,7 +1931,7 @@ struct hl_ctx_mgr {
*/
struct hl_userptr {
enum vm_type vm_type; /* must be first */
- struct list_head job_node;
+ struct rhash_head job_node;
struct page **pages;
unsigned int npages;
struct sg_table *sgt;
@@ -2028,7 +2031,7 @@ struct hl_cs {
* @patched_cb: in case of patching, this is internal CB which is submitted on
* the queue instead of the CB we got from the IOCTL.
* @finish_work: workqueue object to run when job is completed.
- * @userptr_list: linked-list of userptr mappings that belong to this job and
+ * @userptr_ht: hashtable of userptr mappings that belong to this job and
* wait for completion.
* @debugfs_list: node in debugfs list of command submission jobs.
* @refcount: reference counter for usage of the CS job.
@@ -2056,7 +2059,7 @@ struct hl_cs_job {
struct hl_cb *user_cb;
struct hl_cb *patched_cb;
struct work_struct finish_work;
- struct list_head userptr_list;
+ struct rhashtable userptr_ht;
struct list_head debugfs_list;
struct kref refcount;
enum hl_queue_type queue_type;
@@ -2075,7 +2078,7 @@ struct hl_cs_job {
* @user_cb: the CB we got from the user.
* @patched_cb: in case of patching, this is internal CB which is submitted on
* the queue instead of the CB we got from the IOCTL.
- * @job_userptr_list: linked-list of userptr mappings that belong to the related
+ * @job_userptr_ht: hashtable of userptr mappings that belong to the related
* job and wait for completion.
* @cs_sequence: the sequence number of the related CS.
* @queue_type: the type of the H/W queue this job is submitted to.
@@ -2098,7 +2101,7 @@ struct hl_cs_job {
struct hl_cs_parser {
struct hl_cb *user_cb;
struct hl_cb *patched_cb;
- struct list_head *job_userptr_list;
+ struct rhashtable *job_userptr_ht;
u64 cs_sequence;
enum hl_queue_type queue_type;
u32 ctx_id;
@@ -3760,9 +3763,9 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
struct hl_userptr *userptr);
void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
void hl_userptr_delete_list(struct hl_device *hdev,
- struct list_head *userptr_list);
+ struct rhashtable *userptr_ht);
bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
- struct list_head *userptr_list,
+ struct rhashtable *userptr_ht,
struct hl_userptr **userptr);

int hl_mmu_init(struct hl_device *hdev);
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index a7b6a273ce21..e5e7912b3b34 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -23,6 +23,13 @@ MODULE_IMPORT_NS(DMA_BUF);

#define MEM_HANDLE_INVALID ULONG_MAX

+const struct rhashtable_params hl_userptr_rht_params = {
+ .head_offset = offsetof(struct hl_userptr, job_node),
+ .key_offset = offsetof(struct hl_userptr, addr),
+ .key_len = sizeof(u64),
+ .automatic_shrinking = true,
+};
+
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
struct hl_mem_in *args, u64 *handle);

@@ -2483,7 +2490,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
userptr->size = size;
userptr->addr = addr;
userptr->dma_mapped = false;
- INIT_LIST_HEAD(&userptr->job_node);

rc = get_user_memory(hdev, addr, size, npages, start, offset,
userptr);
@@ -2522,8 +2528,6 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
kvfree(userptr->pages);

- list_del(&userptr->job_node);
-
sg_free_table(userptr->sgt);
kfree(userptr->sgt);
}
@@ -2531,23 +2535,31 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
/**
* hl_userptr_delete_list() - clear userptr list.
* @hdev: pointer to the habanalabs device structure.
- * @userptr_list: pointer to the list to clear.
+ * @userptr_ht: pointer to the hashtable to clear.
*
* This function does the following:
* - Iterates over the list and unpins the host memory and frees the userptr
* structure.
*/
void hl_userptr_delete_list(struct hl_device *hdev,
- struct list_head *userptr_list)
+ struct rhashtable *userptr_ht)
{
- struct hl_userptr *userptr, *tmp;
+ struct hl_userptr *userptr;
+ struct rhashtable_iter hti;
+ struct rhash_head *pos;

- list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
+ rhashtable_walk_enter(userptr_ht, &hti);
+ rhashtable_walk_start(&hti);
+ while ((pos = rhashtable_walk_next(&hti))) {
+ if (PTR_ERR(pos) == -EAGAIN)
+ continue;
+ rhashtable_remove_fast(userptr_ht, hti.p, hl_userptr_rht_params);
+ userptr = rhashtable_walk_peek(&hti);
hl_unpin_host_memory(hdev, userptr);
kfree(userptr);
}

- INIT_LIST_HEAD(userptr_list);
+ rhashtable_destroy(userptr_ht);
}

/**
@@ -2555,7 +2567,7 @@ void hl_userptr_delete_list(struct hl_device *hdev,
* @hdev: pointer to the habanalabs device structure.
* @addr: user address to check.
* @size: user block size to check.
- * @userptr_list: pointer to the list to clear.
+ * @userptr_ht: pointer to the hashtable to clear.
* @userptr: pointer to userptr to check.
*
* This function does the following:
@@ -2563,10 +2575,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
* pinned. If so, returns true, otherwise returns false.
*/
bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
- u32 size, struct list_head *userptr_list,
+ u32 size, struct rhashtable *userptr_ht,
struct hl_userptr **userptr)
{
- list_for_each_entry((*userptr), userptr_list, job_node) {
+ (*userptr) = rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_params);
+ if (*userptr) {
if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
return true;
}
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index a29aa8f7b6f3..1e1433042413 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -1031,7 +1031,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
}

free_job:
- hl_userptr_delete_list(hdev, &job->userptr_list);
+ hl_userptr_delete_list(hdev, &job->userptr_ht);
hl_debugfs_remove_job(hdev, job);
kfree(job);
atomic_dec(&cb->cs_cnt);
@@ -4901,7 +4901,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
int rc;

if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
- parser->job_userptr_list, &userptr))
+ parser->job_userptr_ht, &userptr))
goto already_pinned;

userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
@@ -4913,7 +4913,10 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
if (rc)
goto free_userptr;

- list_add_tail(&userptr->job_node, parser->job_userptr_list);
+ rc = rhashtable_insert_fast(parser->job_userptr_ht,
+ &userptr->job_node, hl_userptr_rht_params);
+ if (rc)
+ goto unpin_memory;

rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
@@ -4931,7 +4934,8 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
return 0;

unpin_memory:
- list_del(&userptr->job_node);
+ rhashtable_remove_fast(parser->job_userptr_ht,
+ &userptr->job_node, hl_userptr_rht_params);
hl_unpin_host_memory(hdev, userptr);
free_userptr:
kfree(userptr);
@@ -5175,7 +5179,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
if ((!skip_host_mem_pin) &&
(!hl_userptr_is_pinned(hdev, addr,
le32_to_cpu(user_dma_pkt->tsize),
- parser->job_userptr_list, &userptr))) {
+ parser->job_userptr_ht, &userptr))) {
dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
addr, user_dma_pkt->tsize);
return -EFAULT;
@@ -5472,7 +5476,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,

free_userptr:
if (rc)
- hl_userptr_delete_list(hdev, parser->job_userptr_list);
+ hl_userptr_delete_list(hdev, parser->job_userptr_ht);
return rc;
}

diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index fb0ac9df841a..bfcbb9e8b126 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -3347,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
int rc;

if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
- parser->job_userptr_list, &userptr))
+ parser->job_userptr_ht, &userptr))
goto already_pinned;

userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
@@ -3359,7 +3359,10 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
if (rc)
goto free_userptr;

- list_add_tail(&userptr->job_node, parser->job_userptr_list);
+ rc = rhashtable_insert_fast(parser->job_userptr_ht,
+ &userptr->job_node, hl_userptr_rht_params);
+ if (rc)
+ goto unpin_memory;

rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
@@ -3377,7 +3380,8 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
return 0;

unpin_memory:
- list_del(&userptr->job_node);
+ rhashtable_remove_fast(parser->job_userptr_ht,
+ &userptr->job_node, hl_userptr_rht_params);
hl_unpin_host_memory(hdev, userptr);
free_userptr:
kfree(userptr);
@@ -3806,7 +3810,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
if ((!skip_host_mem_pin) &&
(hl_userptr_is_pinned(hdev, addr,
le32_to_cpu(user_dma_pkt->tsize),
- parser->job_userptr_list, &userptr) == false)) {
+ parser->job_userptr_ht, &userptr) == false)) {
dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
addr, user_dma_pkt->tsize);
return -EFAULT;
@@ -4104,7 +4108,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,

free_userptr:
if (rc)
- hl_userptr_delete_list(hdev, parser->job_userptr_list);
+ hl_userptr_delete_list(hdev, parser->job_userptr_ht);
return rc;
}

--
2.34.1


2023-04-26 11:28:17

by Cai Huoqing

[permalink] [raw]
Subject: Re: [PATCH] accel/habanalabs: Make use of rhashtable

On 26 4月 23 17:28:02, Cai Huoqing wrote:
> Using rhashtable to accelerate the search for userptr by address,
> instead of using a list.
>
> Preferably, the lookup complexity of a hash table is O(1).
>
> This patch will speedup the method
> hl_userptr_is_pinned by rhashtable_lookup_fast.
>
> Signed-off-by: Cai Huoqing <[email protected]>
> ---
> .../habanalabs/common/command_submission.c | 16 ++++++---
> drivers/accel/habanalabs/common/habanalabs.h | 19 +++++-----
> drivers/accel/habanalabs/common/memory.c | 35 +++++++++++++------
> drivers/accel/habanalabs/gaudi/gaudi.c | 16 +++++----
> drivers/accel/habanalabs/goya/goya.c | 14 +++++---
> 5 files changed, 66 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
> index af9d2e22c6e7..35c2ab934396 100644
> --- a/drivers/accel/habanalabs/common/command_submission.c
> +++ b/drivers/accel/habanalabs/common/command_submission.c
> @@ -312,7 +312,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
> parser.job_id = job->id;
>
> parser.hw_queue_id = job->hw_queue_id;
> - parser.job_userptr_list = &job->userptr_list;
> + parser.job_userptr_ht = &job->userptr_ht;
> parser.patched_cb = NULL;
> parser.user_cb = job->user_cb;
> parser.user_cb_size = job->user_cb_size;
> @@ -351,7 +351,7 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
> struct hl_cs *cs = job->cs;
>
> if (is_cb_patched(hdev, job)) {
> - hl_userptr_delete_list(hdev, &job->userptr_list);
> + hl_userptr_delete_list(hdev, &job->userptr_ht);
>
> /*
> * We might arrive here from rollback and patched CB wasn't
> @@ -1284,6 +1284,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
> enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
> {
> struct hl_cs_job *job;
> + int rc;
>
> job = kzalloc(sizeof(*job), GFP_ATOMIC);
> if (!job)
> @@ -1296,13 +1297,20 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
> job->queue_type = queue_type;
> job->is_kernel_allocated_cb = is_kernel_allocated_cb;
>
> - if (is_cb_patched(hdev, job))
> - INIT_LIST_HEAD(&job->userptr_list);
> + if (is_cb_patched(hdev, job)) {
> + rc = rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params);
> + if (rc)
> + goto free_job;
> + }
>
> if (job->queue_type == QUEUE_TYPE_EXT)
> INIT_WORK(&job->finish_work, job_wq_completion);
>
> return job;
> +
> +free_job:
> + kfree(job);
> + return NULL;
> }
>
> static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
> diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
> index eaae69a9f817..9c876d1480d2 100644
> --- a/drivers/accel/habanalabs/common/habanalabs.h
> +++ b/drivers/accel/habanalabs/common/habanalabs.h
> @@ -19,6 +19,7 @@
> #include <linux/dma-direction.h>
> #include <linux/scatterlist.h>
> #include <linux/hashtable.h>
> +#include <linux/rhashtable.h>
> #include <linux/debugfs.h>
> #include <linux/rwsem.h>
> #include <linux/eventfd.h>
> @@ -540,6 +541,8 @@ struct hl_hints_range {
> u64 end_addr;
> };
>
> +extern const struct rhashtable_params hl_userptr_rht_params;
> +
> /**
> * struct asic_fixed_properties - ASIC specific immutable properties.
> * @hw_queues_props: H/W queues properties.
> @@ -1915,7 +1918,7 @@ struct hl_ctx_mgr {
> /**
> * struct hl_userptr - memory mapping chunk information
> * @vm_type: type of the VM.
> - * @job_node: linked-list node for hanging the object on the Job's list.
> + * @job_node: hashtable node for hanging the object on the Job's list.
> * @pages: pointer to struct page array
> * @npages: size of @pages array
> * @sgt: pointer to the scatter-gather table that holds the pages.
> @@ -1928,7 +1931,7 @@ struct hl_ctx_mgr {
> */
> struct hl_userptr {
> enum vm_type vm_type; /* must be first */
> - struct list_head job_node;
> + struct rhash_head job_node;
> struct page **pages;
> unsigned int npages;
> struct sg_table *sgt;
> @@ -2028,7 +2031,7 @@ struct hl_cs {
> * @patched_cb: in case of patching, this is internal CB which is submitted on
> * the queue instead of the CB we got from the IOCTL.
> * @finish_work: workqueue object to run when job is completed.
> - * @userptr_list: linked-list of userptr mappings that belong to this job and
> + * @userptr_ht: hashtable of userptr mappings that belong to this job and
> * wait for completion.
> * @debugfs_list: node in debugfs list of command submission jobs.
> * @refcount: reference counter for usage of the CS job.
> @@ -2056,7 +2059,7 @@ struct hl_cs_job {
> struct hl_cb *user_cb;
> struct hl_cb *patched_cb;
> struct work_struct finish_work;
> - struct list_head userptr_list;
> + struct rhashtable userptr_ht;
> struct list_head debugfs_list;
> struct kref refcount;
> enum hl_queue_type queue_type;
> @@ -2075,7 +2078,7 @@ struct hl_cs_job {
> * @user_cb: the CB we got from the user.
> * @patched_cb: in case of patching, this is internal CB which is submitted on
> * the queue instead of the CB we got from the IOCTL.
> - * @job_userptr_list: linked-list of userptr mappings that belong to the related
> + * @job_userptr_ht: hashtable of userptr mappings that belong to the related
> * job and wait for completion.
> * @cs_sequence: the sequence number of the related CS.
> * @queue_type: the type of the H/W queue this job is submitted to.
> @@ -2098,7 +2101,7 @@ struct hl_cs_job {
> struct hl_cs_parser {
> struct hl_cb *user_cb;
> struct hl_cb *patched_cb;
> - struct list_head *job_userptr_list;
> + struct rhashtable *job_userptr_ht;
> u64 cs_sequence;
> enum hl_queue_type queue_type;
> u32 ctx_id;
> @@ -3760,9 +3763,9 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
> struct hl_userptr *userptr);
> void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
> void hl_userptr_delete_list(struct hl_device *hdev,
> - struct list_head *userptr_list);
> + struct rhashtable *userptr_ht);
> bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
> - struct list_head *userptr_list,
> + struct rhashtable *userptr_ht,
> struct hl_userptr **userptr);
>
> int hl_mmu_init(struct hl_device *hdev);
> diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
> index a7b6a273ce21..e5e7912b3b34 100644
> --- a/drivers/accel/habanalabs/common/memory.c
> +++ b/drivers/accel/habanalabs/common/memory.c
> @@ -23,6 +23,13 @@ MODULE_IMPORT_NS(DMA_BUF);
>
> #define MEM_HANDLE_INVALID ULONG_MAX
>
> +const struct rhashtable_params hl_userptr_rht_params = {
> + .head_offset = offsetof(struct hl_userptr, job_node),
> + .key_offset = offsetof(struct hl_userptr, addr),
> + .key_len = sizeof(u64),
> + .automatic_shrinking = true,
> +};
> +
> static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
> struct hl_mem_in *args, u64 *handle);
>
> @@ -2483,7 +2490,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
> userptr->size = size;
> userptr->addr = addr;
> userptr->dma_mapped = false;
> - INIT_LIST_HEAD(&userptr->job_node);
>
> rc = get_user_memory(hdev, addr, size, npages, start, offset,
> userptr);
> @@ -2522,8 +2528,6 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
> unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
> kvfree(userptr->pages);
>
> - list_del(&userptr->job_node);
> -
> sg_free_table(userptr->sgt);
> kfree(userptr->sgt);
> }
> @@ -2531,23 +2535,31 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
> /**
> * hl_userptr_delete_list() - clear userptr list.
> * @hdev: pointer to the habanalabs device structure.
> - * @userptr_list: pointer to the list to clear.
> + * @userptr_ht: pointer to the hashtable to clear.
> *
> * This function does the following:
> * - Iterates over the list and unpins the host memory and frees the userptr
> * structure.
> */
> void hl_userptr_delete_list(struct hl_device *hdev,
> - struct list_head *userptr_list)
> + struct rhashtable *userptr_ht)
> {
> - struct hl_userptr *userptr, *tmp;
> + struct hl_userptr *userptr;
> + struct rhashtable_iter hti;
> + struct rhash_head *pos;
>
> - list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
> + rhashtable_walk_enter(userptr_ht, &hti);
> + rhashtable_walk_start(&hti);
> + while ((pos = rhashtable_walk_next(&hti))) {

rhashtable_walk_next seems not stable,
will revert here, keep 'userptr_list' to do clear by list_for_each.
And send the v2 patch

Cai-
Thanks
> + if (PTR_ERR(pos) == -EAGAIN)
> + continue;
> + rhashtable_remove_fast(userptr_ht, hti.p, hl_userptr_rht_params);
> + userptr = rhashtable_walk_peek(&hti);
> hl_unpin_host_memory(hdev, userptr);
> kfree(userptr);
> }
>
> - INIT_LIST_HEAD(userptr_list);
> + rhashtable_destroy(userptr_ht);
> }
>
> /**
> @@ -2555,7 +2567,7 @@ void hl_userptr_delete_list(struct hl_device *hdev,
> * @hdev: pointer to the habanalabs device structure.
> * @addr: user address to check.
> * @size: user block size to check.
> - * @userptr_list: pointer to the list to clear.
> + * @userptr_ht: pointer to the hashtable to clear.
> * @userptr: pointer to userptr to check.
> *
> * This function does the following:
> @@ -2563,10 +2575,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
> * pinned. If so, returns true, otherwise returns false.
> */
> bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
> - u32 size, struct list_head *userptr_list,
> + u32 size, struct rhashtable *userptr_ht,
> struct hl_userptr **userptr)
> {
> - list_for_each_entry((*userptr), userptr_list, job_node) {
> + (*userptr) = rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_params);
> + if (*userptr) {
> if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
> return true;
> }
> diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
> index a29aa8f7b6f3..1e1433042413 100644
> --- a/drivers/accel/habanalabs/gaudi/gaudi.c
> +++ b/drivers/accel/habanalabs/gaudi/gaudi.c
> @@ -1031,7 +1031,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
> }
>
> free_job:
> - hl_userptr_delete_list(hdev, &job->userptr_list);
> + hl_userptr_delete_list(hdev, &job->userptr_ht);
> hl_debugfs_remove_job(hdev, job);
> kfree(job);
> atomic_dec(&cb->cs_cnt);
> @@ -4901,7 +4901,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> int rc;
>
> if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> - parser->job_userptr_list, &userptr))
> + parser->job_userptr_ht, &userptr))
> goto already_pinned;
>
> userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> @@ -4913,7 +4913,10 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> if (rc)
> goto free_userptr;
>
> - list_add_tail(&userptr->job_node, parser->job_userptr_list);
> + rc = rhashtable_insert_fast(parser->job_userptr_ht,
> + &userptr->job_node, hl_userptr_rht_params);
> + if (rc)
> + goto unpin_memory;
>
> rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
> if (rc) {
> @@ -4931,7 +4934,8 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> return 0;
>
> unpin_memory:
> - list_del(&userptr->job_node);
> + rhashtable_remove_fast(parser->job_userptr_ht,
> + &userptr->job_node, hl_userptr_rht_params);
> hl_unpin_host_memory(hdev, userptr);
> free_userptr:
> kfree(userptr);
> @@ -5175,7 +5179,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
> if ((!skip_host_mem_pin) &&
> (!hl_userptr_is_pinned(hdev, addr,
> le32_to_cpu(user_dma_pkt->tsize),
> - parser->job_userptr_list, &userptr))) {
> + parser->job_userptr_ht, &userptr))) {
> dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
> addr, user_dma_pkt->tsize);
> return -EFAULT;
> @@ -5472,7 +5476,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
>
> free_userptr:
> if (rc)
> - hl_userptr_delete_list(hdev, parser->job_userptr_list);
> + hl_userptr_delete_list(hdev, parser->job_userptr_ht);
> return rc;
> }
>
> diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
> index fb0ac9df841a..bfcbb9e8b126 100644
> --- a/drivers/accel/habanalabs/goya/goya.c
> +++ b/drivers/accel/habanalabs/goya/goya.c
> @@ -3347,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> int rc;
>
> if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> - parser->job_userptr_list, &userptr))
> + parser->job_userptr_ht, &userptr))
> goto already_pinned;
>
> userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> @@ -3359,7 +3359,10 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> if (rc)
> goto free_userptr;
>
> - list_add_tail(&userptr->job_node, parser->job_userptr_list);
> + rc = rhashtable_insert_fast(parser->job_userptr_ht,
> + &userptr->job_node, hl_userptr_rht_params);
> + if (rc)
> + goto unpin_memory;
>
> rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
> if (rc) {
> @@ -3377,7 +3380,8 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> return 0;
>
> unpin_memory:
> - list_del(&userptr->job_node);
> + rhashtable_remove_fast(parser->job_userptr_ht,
> + &userptr->job_node, hl_userptr_rht_params);
> hl_unpin_host_memory(hdev, userptr);
> free_userptr:
> kfree(userptr);
> @@ -3806,7 +3810,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
> if ((!skip_host_mem_pin) &&
> (hl_userptr_is_pinned(hdev, addr,
> le32_to_cpu(user_dma_pkt->tsize),
> - parser->job_userptr_list, &userptr) == false)) {
> + parser->job_userptr_ht, &userptr) == false)) {
> dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
> addr, user_dma_pkt->tsize);
> return -EFAULT;
> @@ -4104,7 +4108,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
>
> free_userptr:
> if (rc)
> - hl_userptr_delete_list(hdev, parser->job_userptr_list);
> + hl_userptr_delete_list(hdev, parser->job_userptr_ht);
> return rc;
> }
>
> --
> 2.34.1
>

2023-04-28 15:07:31

by Cai Huoqing

[permalink] [raw]
Subject: Re: [PATCH] accel/habanalabs: Make use of rhashtable

On 26 4月 23 19:22:43, Cai Huoqing wrote:
> On 26 4月 23 17:28:02, Cai Huoqing wrote:
> > Using rhashtable to accelerate the search for userptr by address,
> > instead of using a list.
> >
> > Preferably, the lookup complexity of a hash table is O(1).
> >
> > This patch will speedup the method
> > hl_userptr_is_pinned by rhashtable_lookup_fast.
> >
> > Signed-off-by: Cai Huoqing <[email protected]>
> > ---
> > .../habanalabs/common/command_submission.c | 16 ++++++---
> > drivers/accel/habanalabs/common/habanalabs.h | 19 +++++-----
> > drivers/accel/habanalabs/common/memory.c | 35 +++++++++++++------
> > drivers/accel/habanalabs/gaudi/gaudi.c | 16 +++++----
> > drivers/accel/habanalabs/goya/goya.c | 14 +++++---
> > 5 files changed, 66 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
> > index af9d2e22c6e7..35c2ab934396 100644
> > --- a/drivers/accel/habanalabs/common/command_submission.c
> > +++ b/drivers/accel/habanalabs/common/command_submission.c
> > @@ -312,7 +312,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
> > parser.job_id = job->id;
> >
> > parser.hw_queue_id = job->hw_queue_id;
> > - parser.job_userptr_list = &job->userptr_list;
> > + parser.job_userptr_ht = &job->userptr_ht;
> > parser.patched_cb = NULL;
> > parser.user_cb = job->user_cb;
> > parser.user_cb_size = job->user_cb_size;
> > @@ -351,7 +351,7 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
> > struct hl_cs *cs = job->cs;
> >
> > if (is_cb_patched(hdev, job)) {
> > - hl_userptr_delete_list(hdev, &job->userptr_list);
> > + hl_userptr_delete_list(hdev, &job->userptr_ht);
> >
> > /*
> > * We might arrive here from rollback and patched CB wasn't
> > @@ -1284,6 +1284,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
> > enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
> > {
> > struct hl_cs_job *job;
> > + int rc;
> >
> > job = kzalloc(sizeof(*job), GFP_ATOMIC);
> > if (!job)
> > @@ -1296,13 +1297,20 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
> > job->queue_type = queue_type;
> > job->is_kernel_allocated_cb = is_kernel_allocated_cb;
> >
> > - if (is_cb_patched(hdev, job))
> > - INIT_LIST_HEAD(&job->userptr_list);
> > + if (is_cb_patched(hdev, job)) {
> > + rc = rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params);
> > + if (rc)
> > + goto free_job;
> > + }
> >
> > if (job->queue_type == QUEUE_TYPE_EXT)
> > INIT_WORK(&job->finish_work, job_wq_completion);
> >
> > return job;
> > +
> > +free_job:
> > + kfree(job);
> > + return NULL;
> > }
> >
> > static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
> > diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
> > index eaae69a9f817..9c876d1480d2 100644
> > --- a/drivers/accel/habanalabs/common/habanalabs.h
> > +++ b/drivers/accel/habanalabs/common/habanalabs.h
> > @@ -19,6 +19,7 @@
> > #include <linux/dma-direction.h>
> > #include <linux/scatterlist.h>
> > #include <linux/hashtable.h>
> > +#include <linux/rhashtable.h>
> > #include <linux/debugfs.h>
> > #include <linux/rwsem.h>
> > #include <linux/eventfd.h>
> > @@ -540,6 +541,8 @@ struct hl_hints_range {
> > u64 end_addr;
> > };
> >
> > +extern const struct rhashtable_params hl_userptr_rht_params;
> > +
> > /**
> > * struct asic_fixed_properties - ASIC specific immutable properties.
> > * @hw_queues_props: H/W queues properties.
> > @@ -1915,7 +1918,7 @@ struct hl_ctx_mgr {
> > /**
> > * struct hl_userptr - memory mapping chunk information
> > * @vm_type: type of the VM.
> > - * @job_node: linked-list node for hanging the object on the Job's list.
> > + * @job_node: hashtable node for hanging the object on the Job's list.
> > * @pages: pointer to struct page array
> > * @npages: size of @pages array
> > * @sgt: pointer to the scatter-gather table that holds the pages.
> > @@ -1928,7 +1931,7 @@ struct hl_ctx_mgr {
> > */
> > struct hl_userptr {
> > enum vm_type vm_type; /* must be first */
> > - struct list_head job_node;
> > + struct rhash_head job_node;
> > struct page **pages;
> > unsigned int npages;
> > struct sg_table *sgt;
> > @@ -2028,7 +2031,7 @@ struct hl_cs {
> > * @patched_cb: in case of patching, this is internal CB which is submitted on
> > * the queue instead of the CB we got from the IOCTL.
> > * @finish_work: workqueue object to run when job is completed.
> > - * @userptr_list: linked-list of userptr mappings that belong to this job and
> > + * @userptr_ht: hashtable of userptr mappings that belong to this job and
> > * wait for completion.
> > * @debugfs_list: node in debugfs list of command submission jobs.
> > * @refcount: reference counter for usage of the CS job.
> > @@ -2056,7 +2059,7 @@ struct hl_cs_job {
> > struct hl_cb *user_cb;
> > struct hl_cb *patched_cb;
> > struct work_struct finish_work;
> > - struct list_head userptr_list;
> > + struct rhashtable userptr_ht;
> > struct list_head debugfs_list;
> > struct kref refcount;
> > enum hl_queue_type queue_type;
> > @@ -2075,7 +2078,7 @@ struct hl_cs_job {
> > * @user_cb: the CB we got from the user.
> > * @patched_cb: in case of patching, this is internal CB which is submitted on
> > * the queue instead of the CB we got from the IOCTL.
> > - * @job_userptr_list: linked-list of userptr mappings that belong to the related
> > + * @job_userptr_ht: hashtable of userptr mappings that belong to the related
> > * job and wait for completion.
> > * @cs_sequence: the sequence number of the related CS.
> > * @queue_type: the type of the H/W queue this job is submitted to.
> > @@ -2098,7 +2101,7 @@ struct hl_cs_job {
> > struct hl_cs_parser {
> > struct hl_cb *user_cb;
> > struct hl_cb *patched_cb;
> > - struct list_head *job_userptr_list;
> > + struct rhashtable *job_userptr_ht;
> > u64 cs_sequence;
> > enum hl_queue_type queue_type;
> > u32 ctx_id;
> > @@ -3760,9 +3763,9 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
> > struct hl_userptr *userptr);
> > void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
> > void hl_userptr_delete_list(struct hl_device *hdev,
> > - struct list_head *userptr_list);
> > + struct rhashtable *userptr_ht);
> > bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
> > - struct list_head *userptr_list,
> > + struct rhashtable *userptr_ht,
> > struct hl_userptr **userptr);
> >
> > int hl_mmu_init(struct hl_device *hdev);
> > diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
> > index a7b6a273ce21..e5e7912b3b34 100644
> > --- a/drivers/accel/habanalabs/common/memory.c
> > +++ b/drivers/accel/habanalabs/common/memory.c
> > @@ -23,6 +23,13 @@ MODULE_IMPORT_NS(DMA_BUF);
> >
> > #define MEM_HANDLE_INVALID ULONG_MAX
> >
> > +const struct rhashtable_params hl_userptr_rht_params = {
> > + .head_offset = offsetof(struct hl_userptr, job_node),
> > + .key_offset = offsetof(struct hl_userptr, addr),
> > + .key_len = sizeof(u64),
> > + .automatic_shrinking = true,
> > +};
> > +
> > static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
> > struct hl_mem_in *args, u64 *handle);
> >
> > @@ -2483,7 +2490,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
> > userptr->size = size;
> > userptr->addr = addr;
> > userptr->dma_mapped = false;
> > - INIT_LIST_HEAD(&userptr->job_node);
> >
> > rc = get_user_memory(hdev, addr, size, npages, start, offset,
> > userptr);
> > @@ -2522,8 +2528,6 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
> > unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
> > kvfree(userptr->pages);
> >
> > - list_del(&userptr->job_node);
> > -
> > sg_free_table(userptr->sgt);
> > kfree(userptr->sgt);
> > }
> > @@ -2531,23 +2535,31 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
> > /**
> > * hl_userptr_delete_list() - clear userptr list.
> > * @hdev: pointer to the habanalabs device structure.
> > - * @userptr_list: pointer to the list to clear.
> > + * @userptr_ht: pointer to the hashtable to clear.
> > *
> > * This function does the following:
> > * - Iterates over the list and unpins the host memory and frees the userptr
> > * structure.
> > */
> > void hl_userptr_delete_list(struct hl_device *hdev,
> > - struct list_head *userptr_list)
> > + struct rhashtable *userptr_ht)
> > {
> > - struct hl_userptr *userptr, *tmp;
> > + struct hl_userptr *userptr;
> > + struct rhashtable_iter hti;
> > + struct rhash_head *pos;
> >
> > - list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
> > + rhashtable_walk_enter(userptr_ht, &hti);
> > + rhashtable_walk_start(&hti);
> > + while ((pos = rhashtable_walk_next(&hti))) {
>
> rhashtable_walk_next seems not stable,
> will revert here, keep 'userptr_list' to do clear by list_for_each.
> And send the v2 patch
>
> Cai-
> Thanks
rhashtable_free_and_destroy can be used here

I have sent v2 patch

https://lore.kernel.org/lkml/[email protected]/

Thanks,
Cai-

> > + if (PTR_ERR(pos) == -EAGAIN)
> > + continue;
> > + rhashtable_remove_fast(userptr_ht, hti.p, hl_userptr_rht_params);
> > + userptr = rhashtable_walk_peek(&hti);
> > hl_unpin_host_memory(hdev, userptr);
> > kfree(userptr);
> > }
> >
> > - INIT_LIST_HEAD(userptr_list);
> > + rhashtable_destroy(userptr_ht);
> > }
> >
> > /**
> > @@ -2555,7 +2567,7 @@ void hl_userptr_delete_list(struct hl_device *hdev,
> > * @hdev: pointer to the habanalabs device structure.
> > * @addr: user address to check.
> > * @size: user block size to check.
> > - * @userptr_list: pointer to the list to clear.
> > + * @userptr_ht: pointer to the hashtable to clear.
> > * @userptr: pointer to userptr to check.
> > *
> > * This function does the following:
> > @@ -2563,10 +2575,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
> > * pinned. If so, returns true, otherwise returns false.
> > */
> > bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
> > - u32 size, struct list_head *userptr_list,
> > + u32 size, struct rhashtable *userptr_ht,
> > struct hl_userptr **userptr)
> > {
> > - list_for_each_entry((*userptr), userptr_list, job_node) {
> > + (*userptr) = rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_params);
> > + if (*userptr) {
> > if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
> > return true;
> > }
> > diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
> > index a29aa8f7b6f3..1e1433042413 100644
> > --- a/drivers/accel/habanalabs/gaudi/gaudi.c
> > +++ b/drivers/accel/habanalabs/gaudi/gaudi.c
> > @@ -1031,7 +1031,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
> > }
> >
> > free_job:
> > - hl_userptr_delete_list(hdev, &job->userptr_list);
> > + hl_userptr_delete_list(hdev, &job->userptr_ht);
> > hl_debugfs_remove_job(hdev, job);
> > kfree(job);
> > atomic_dec(&cb->cs_cnt);
> > @@ -4901,7 +4901,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> > int rc;
> >
> > if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> > - parser->job_userptr_list, &userptr))
> > + parser->job_userptr_ht, &userptr))
> > goto already_pinned;
> >
> > userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> > @@ -4913,7 +4913,10 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> > if (rc)
> > goto free_userptr;
> >
> > - list_add_tail(&userptr->job_node, parser->job_userptr_list);
> > + rc = rhashtable_insert_fast(parser->job_userptr_ht,
> > + &userptr->job_node, hl_userptr_rht_params);
> > + if (rc)
> > + goto unpin_memory;
> >
> > rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
> > if (rc) {
> > @@ -4931,7 +4934,8 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> > return 0;
> >
> > unpin_memory:
> > - list_del(&userptr->job_node);
> > + rhashtable_remove_fast(parser->job_userptr_ht,
> > + &userptr->job_node, hl_userptr_rht_params);
> > hl_unpin_host_memory(hdev, userptr);
> > free_userptr:
> > kfree(userptr);
> > @@ -5175,7 +5179,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
> > if ((!skip_host_mem_pin) &&
> > (!hl_userptr_is_pinned(hdev, addr,
> > le32_to_cpu(user_dma_pkt->tsize),
> > - parser->job_userptr_list, &userptr))) {
> > + parser->job_userptr_ht, &userptr))) {
> > dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
> > addr, user_dma_pkt->tsize);
> > return -EFAULT;
> > @@ -5472,7 +5476,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
> >
> > free_userptr:
> > if (rc)
> > - hl_userptr_delete_list(hdev, parser->job_userptr_list);
> > + hl_userptr_delete_list(hdev, parser->job_userptr_ht);
> > return rc;
> > }
> >
> > diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
> > index fb0ac9df841a..bfcbb9e8b126 100644
> > --- a/drivers/accel/habanalabs/goya/goya.c
> > +++ b/drivers/accel/habanalabs/goya/goya.c
> > @@ -3347,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> > int rc;
> >
> > if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> > - parser->job_userptr_list, &userptr))
> > + parser->job_userptr_ht, &userptr))
> > goto already_pinned;
> >
> > userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> > @@ -3359,7 +3359,10 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> > if (rc)
> > goto free_userptr;
> >
> > - list_add_tail(&userptr->job_node, parser->job_userptr_list);
> > + rc = rhashtable_insert_fast(parser->job_userptr_ht,
> > + &userptr->job_node, hl_userptr_rht_params);
> > + if (rc)
> > + goto unpin_memory;
> >
> > rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
> > if (rc) {
> > @@ -3377,7 +3380,8 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> > return 0;
> >
> > unpin_memory:
> > - list_del(&userptr->job_node);
> > + rhashtable_remove_fast(parser->job_userptr_ht,
> > + &userptr->job_node, hl_userptr_rht_params);
> > hl_unpin_host_memory(hdev, userptr);
> > free_userptr:
> > kfree(userptr);
> > @@ -3806,7 +3810,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
> > if ((!skip_host_mem_pin) &&
> > (hl_userptr_is_pinned(hdev, addr,
> > le32_to_cpu(user_dma_pkt->tsize),
> > - parser->job_userptr_list, &userptr) == false)) {
> > + parser->job_userptr_ht, &userptr) == false)) {
> > dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
> > addr, user_dma_pkt->tsize);
> > return -EFAULT;
> > @@ -4104,7 +4108,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
> >
> > free_userptr:
> > if (rc)
> > - hl_userptr_delete_list(hdev, parser->job_userptr_list);
> > + hl_userptr_delete_list(hdev, parser->job_userptr_ht);
> > return rc;
> > }
> >
> > --
> > 2.34.1
> >