[PATCH v2] accel/habanalabs: Make use of rhashtable

Cai Huoqing cai.huoqing at linux.dev
Fri Apr 28 14:48:54 UTC 2023


Using rhashtable to accelerate the search for userptr by address,
instead of using a list.

Preferably, the lookup complexity of a hash table is O(1).

This patch will speedup the method
hl_userptr_is_pinned by rhashtable_lookup_fast.

Signed-off-by: Cai Huoqing <cai.huoqing at linux.dev>
---
v1->v2:
Use rhashtable_free_and_destroy in hl_userptr_delete_list.

 .../habanalabs/common/command_submission.c    | 16 ++++++--
 drivers/accel/habanalabs/common/habanalabs.h  | 19 +++++----
 drivers/accel/habanalabs/common/memory.c      | 39 +++++++++++--------
 drivers/accel/habanalabs/gaudi/gaudi.c        | 16 +++++---
 drivers/accel/habanalabs/goya/goya.c          | 14 ++++---
 5 files changed, 65 insertions(+), 39 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index af9d2e22c6e7..35c2ab934396 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -312,7 +312,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 	parser.job_id = job->id;
 
 	parser.hw_queue_id = job->hw_queue_id;
-	parser.job_userptr_list = &job->userptr_list;
+	parser.job_userptr_ht = &job->userptr_ht;
 	parser.patched_cb = NULL;
 	parser.user_cb = job->user_cb;
 	parser.user_cb_size = job->user_cb_size;
@@ -351,7 +351,7 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 	struct hl_cs *cs = job->cs;
 
 	if (is_cb_patched(hdev, job)) {
-		hl_userptr_delete_list(hdev, &job->userptr_list);
+		hl_userptr_delete_list(hdev, &job->userptr_ht);
 
 		/*
 		 * We might arrive here from rollback and patched CB wasn't
@@ -1284,6 +1284,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
 {
 	struct hl_cs_job *job;
+	int rc;
 
 	job = kzalloc(sizeof(*job), GFP_ATOMIC);
 	if (!job)
@@ -1296,13 +1297,20 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 	job->queue_type = queue_type;
 	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
 
-	if (is_cb_patched(hdev, job))
-		INIT_LIST_HEAD(&job->userptr_list);
+	if (is_cb_patched(hdev, job)) {
+		rc = rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params);
+		if (rc)
+			goto free_job;
+	}
 
 	if (job->queue_type == QUEUE_TYPE_EXT)
 		INIT_WORK(&job->finish_work, job_wq_completion);
 
 	return job;
+
+free_job:
+	kfree(job);
+	return NULL;
 }
 
 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index eaae69a9f817..9c876d1480d2 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -19,6 +19,7 @@
 #include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
 #include <linux/hashtable.h>
+#include <linux/rhashtable.h>
 #include <linux/debugfs.h>
 #include <linux/rwsem.h>
 #include <linux/eventfd.h>
@@ -540,6 +541,8 @@ struct hl_hints_range {
 	u64 end_addr;
 };
 
+extern const struct rhashtable_params hl_userptr_rht_params;
+
 /**
  * struct asic_fixed_properties - ASIC specific immutable properties.
  * @hw_queues_props: H/W queues properties.
@@ -1915,7 +1918,7 @@ struct hl_ctx_mgr {
 /**
  * struct hl_userptr - memory mapping chunk information
  * @vm_type: type of the VM.
- * @job_node: linked-list node for hanging the object on the Job's list.
+ * @job_node: hashtable node for hanging the object on the Job's list.
  * @pages: pointer to struct page array
  * @npages: size of @pages array
  * @sgt: pointer to the scatter-gather table that holds the pages.
@@ -1928,7 +1931,7 @@ struct hl_ctx_mgr {
  */
 struct hl_userptr {
 	enum vm_type		vm_type; /* must be first */
-	struct list_head	job_node;
+	struct rhash_head	job_node;
 	struct page		**pages;
 	unsigned int		npages;
 	struct sg_table		*sgt;
@@ -2028,7 +2031,7 @@ struct hl_cs {
  * @patched_cb: in case of patching, this is internal CB which is submitted on
  *		the queue instead of the CB we got from the IOCTL.
  * @finish_work: workqueue object to run when job is completed.
- * @userptr_list: linked-list of userptr mappings that belong to this job and
+ * @userptr_ht: hashtable of userptr mappings that belong to this job and
  *			wait for completion.
  * @debugfs_list: node in debugfs list of command submission jobs.
  * @refcount: reference counter for usage of the CS job.
@@ -2056,7 +2059,7 @@ struct hl_cs_job {
 	struct hl_cb		*user_cb;
 	struct hl_cb		*patched_cb;
 	struct work_struct	finish_work;
-	struct list_head	userptr_list;
+	struct rhashtable	userptr_ht;
 	struct list_head	debugfs_list;
 	struct kref		refcount;
 	enum hl_queue_type	queue_type;
@@ -2075,7 +2078,7 @@ struct hl_cs_job {
  * @user_cb: the CB we got from the user.
  * @patched_cb: in case of patching, this is internal CB which is submitted on
  *		the queue instead of the CB we got from the IOCTL.
- * @job_userptr_list: linked-list of userptr mappings that belong to the related
+ * @job_userptr_ht: hashtable of userptr mappings that belong to the related
  *			job and wait for completion.
  * @cs_sequence: the sequence number of the related CS.
  * @queue_type: the type of the H/W queue this job is submitted to.
@@ -2098,7 +2101,7 @@ struct hl_cs_job {
 struct hl_cs_parser {
 	struct hl_cb		*user_cb;
 	struct hl_cb		*patched_cb;
-	struct list_head	*job_userptr_list;
+	struct rhashtable	*job_userptr_ht;
 	u64			cs_sequence;
 	enum hl_queue_type	queue_type;
 	u32			ctx_id;
@@ -3760,9 +3763,9 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 			struct hl_userptr *userptr);
 void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
 void hl_userptr_delete_list(struct hl_device *hdev,
-				struct list_head *userptr_list);
+				struct rhashtable *userptr_ht);
 bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
-				struct list_head *userptr_list,
+				struct rhashtable *userptr_ht,
 				struct hl_userptr **userptr);
 
 int hl_mmu_init(struct hl_device *hdev);
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index a7b6a273ce21..fa2104e33639 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -23,6 +23,13 @@ MODULE_IMPORT_NS(DMA_BUF);
 
 #define MEM_HANDLE_INVALID	ULONG_MAX
 
+const struct rhashtable_params hl_userptr_rht_params = {
+	.head_offset = offsetof(struct hl_userptr, job_node),
+	.key_offset = offsetof(struct hl_userptr, addr),
+	.key_len = sizeof(u64),
+	.automatic_shrinking = true,
+};
+
 static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
 			struct hl_mem_in *args, u64 *handle);
 
@@ -2483,7 +2490,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 	userptr->size = size;
 	userptr->addr = addr;
 	userptr->dma_mapped = false;
-	INIT_LIST_HEAD(&userptr->job_node);
 
 	rc = get_user_memory(hdev, addr, size, npages, start, offset,
 				userptr);
@@ -2522,32 +2528,32 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 	unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
 	kvfree(userptr->pages);
 
-	list_del(&userptr->job_node);
-
 	sg_free_table(userptr->sgt);
 	kfree(userptr->sgt);
 }
 
+static void hl_userptr_free_cb(void *ptr, void *arg)
+{
+	struct hl_userptr *userptr = ptr;
+	struct hl_device *hdev = (struct hl_device *)arg;
+
+	hl_unpin_host_memory(hdev, userptr);
+	kfree(userptr);
+}
+
 /**
  * hl_userptr_delete_list() - clear userptr list.
  * @hdev: pointer to the habanalabs device structure.
- * @userptr_list: pointer to the list to clear.
+ * @userptr_ht: pointer to the hashtable to clear.
  *
  * This function does the following:
  * - Iterates over the list and unpins the host memory and frees the userptr
  *   structure.
  */
 void hl_userptr_delete_list(struct hl_device *hdev,
-				struct list_head *userptr_list)
+				struct rhashtable *userptr_ht)
 {
-	struct hl_userptr *userptr, *tmp;
-
-	list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
-		hl_unpin_host_memory(hdev, userptr);
-		kfree(userptr);
-	}
-
-	INIT_LIST_HEAD(userptr_list);
+	rhashtable_free_and_destroy(userptr_ht, hl_userptr_free_cb, hdev);
 }
 
 /**
@@ -2555,7 +2561,7 @@ void hl_userptr_delete_list(struct hl_device *hdev,
  * @hdev: pointer to the habanalabs device structure.
  * @addr: user address to check.
  * @size: user block size to check.
- * @userptr_list: pointer to the list to clear.
+ * @userptr_ht: pointer to the hashtable to clear.
  * @userptr: pointer to userptr to check.
  *
  * This function does the following:
@@ -2563,10 +2569,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
  *   pinned. If so, returns true, otherwise returns false.
  */
 bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
-				u32 size, struct list_head *userptr_list,
+				u32 size, struct rhashtable *userptr_ht,
 				struct hl_userptr **userptr)
 {
-	list_for_each_entry((*userptr), userptr_list, job_node) {
+	(*userptr) = rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_params);
+	if (*userptr) {
 		if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
 			return true;
 	}
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index a29aa8f7b6f3..1e1433042413 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -1031,7 +1031,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
 	}
 
 free_job:
-	hl_userptr_delete_list(hdev, &job->userptr_list);
+	hl_userptr_delete_list(hdev, &job->userptr_ht);
 	hl_debugfs_remove_job(hdev, job);
 	kfree(job);
 	atomic_dec(&cb->cs_cnt);
@@ -4901,7 +4901,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 	int rc;
 
 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
-			parser->job_userptr_list, &userptr))
+			parser->job_userptr_ht, &userptr))
 		goto already_pinned;
 
 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
@@ -4913,7 +4913,10 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 	if (rc)
 		goto free_userptr;
 
-	list_add_tail(&userptr->job_node, parser->job_userptr_list);
+	rc = rhashtable_insert_fast(parser->job_userptr_ht,
+				    &userptr->job_node, hl_userptr_rht_params);
+	if (rc)
+		goto unpin_memory;
 
 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
@@ -4931,7 +4934,8 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 	return 0;
 
 unpin_memory:
-	list_del(&userptr->job_node);
+	rhashtable_remove_fast(parser->job_userptr_ht,
+			       &userptr->job_node, hl_userptr_rht_params);
 	hl_unpin_host_memory(hdev, userptr);
 free_userptr:
 	kfree(userptr);
@@ -5175,7 +5179,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
 	if ((!skip_host_mem_pin) &&
 		(!hl_userptr_is_pinned(hdev, addr,
 					le32_to_cpu(user_dma_pkt->tsize),
-					parser->job_userptr_list, &userptr))) {
+					parser->job_userptr_ht, &userptr))) {
 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
 				addr, user_dma_pkt->tsize);
 		return -EFAULT;
@@ -5472,7 +5476,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
 
 free_userptr:
 	if (rc)
-		hl_userptr_delete_list(hdev, parser->job_userptr_list);
+		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
 	return rc;
 }
 
diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index fb0ac9df841a..bfcbb9e8b126 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -3347,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
 	int rc;
 
 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
-			parser->job_userptr_list, &userptr))
+			parser->job_userptr_ht, &userptr))
 		goto already_pinned;
 
 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
@@ -3359,7 +3359,10 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
 	if (rc)
 		goto free_userptr;
 
-	list_add_tail(&userptr->job_node, parser->job_userptr_list);
+	rc = rhashtable_insert_fast(parser->job_userptr_ht,
+				    &userptr->job_node, hl_userptr_rht_params);
+	if (rc)
+		goto unpin_memory;
 
 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
@@ -3377,7 +3380,8 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
 	return 0;
 
 unpin_memory:
-	list_del(&userptr->job_node);
+	rhashtable_remove_fast(parser->job_userptr_ht,
+			       &userptr->job_node, hl_userptr_rht_params);
 	hl_unpin_host_memory(hdev, userptr);
 free_userptr:
 	kfree(userptr);
@@ -3806,7 +3810,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
 	if ((!skip_host_mem_pin) &&
 		(hl_userptr_is_pinned(hdev, addr,
 			le32_to_cpu(user_dma_pkt->tsize),
-			parser->job_userptr_list, &userptr) == false)) {
+			parser->job_userptr_ht, &userptr) == false)) {
 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
 				addr, user_dma_pkt->tsize);
 		return -EFAULT;
@@ -4104,7 +4108,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
 
 free_userptr:
 	if (rc)
-		hl_userptr_delete_list(hdev, parser->job_userptr_list);
+		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
 	return rc;
 }
 
-- 
2.34.1



More information about the dri-devel mailing list