[PATCH 08/10] accel/habanalabs: add unregister timestamp uapi

Oded Gabbay ogabbay at kernel.org
Tue Apr 18 09:39:14 UTC 2023


From: farah kassabri <fkassabri at habana.ai>

Add uapi to allow user to unregister timestamp record.
This is needed when the user wishes to re-use the same record with
different interrupt id. For that, the user must first unregister it
from the current interrupt id and then register it with the new id.

Signed-off-by: farah kassabri <fkassabri at habana.ai>
Reviewed-by: Oded Gabbay <ogabbay at kernel.org>
Signed-off-by: Oded Gabbay <ogabbay at kernel.org>
---
 .../habanalabs/common/command_submission.c    | 123 ++++++++++++++----
 include/uapi/drm/habanalabs_accel.h           |   1 +
 2 files changed, 101 insertions(+), 23 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index af9d2e22c6e7..977900866e8f 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -3196,34 +3196,57 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
+static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
+					struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
+{
+	record->ts_reg_info.cq_cb = cq_cb;
+	record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
+	record->cq_target_value = target_value;
+}
+
+static int validate_and_get_ts_record(struct device *dev,
+					struct hl_ts_buff *ts_buff, u64 ts_offset,
+					struct hl_user_pending_interrupt **req_event_record)
+{
+	struct hl_user_pending_interrupt *ts_cb_last;
+
+	*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+						ts_offset;
+	ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
+
+	/* Validate ts_offset not exceeding last max */
+	if (*req_event_record >= ts_cb_last) {
+		dev_err(dev, "Ts offset(%llx) exceeds max CB offset(0x%llx)\n",
+				ts_offset, (u64)(uintptr_t)ts_cb_last);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
 					struct hl_cb *cq_cb,
 					u64 ts_offset, u64 cq_offset, u64 target_value,
 					spinlock_t *wait_list_lock,
 					struct hl_user_pending_interrupt **pend)
 {
+	struct hl_user_pending_interrupt *requested_offset_record;
 	struct hl_ts_buff *ts_buff = buf->private;
-	struct hl_user_pending_interrupt *requested_offset_record =
-				(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
-				ts_offset;
-	struct hl_user_pending_interrupt *cb_last =
-			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
-			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
-	unsigned long iter_counter = 0;
+	unsigned long iter_counter = 0, flags;
 	u64 current_cq_counter;
 	ktime_t timestamp;
+	int rc;
 
-	/* Validate ts_offset not exceeding last max */
-	if (requested_offset_record >= cb_last) {
-		dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
-								(u64)(uintptr_t)cb_last);
-		return -EINVAL;
-	}
+	rc = validate_and_get_ts_record(buf->mmg->dev, ts_buff, ts_offset,
+							&requested_offset_record);
+	if (rc)
+		return rc;
 
 	timestamp = ktime_get();
 
 start_over:
-	spin_lock(wait_list_lock);
+	spin_lock_irqsave(wait_list_lock, flags);
 
 	/* Unregister only if we didn't reach the target value
 	 * since in this case there will be no handling in irq context
@@ -3234,7 +3257,9 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
 		current_cq_counter = *requested_offset_record->cq_kernel_addr;
 		if (current_cq_counter < requested_offset_record->cq_target_value) {
 			list_del(&requested_offset_record->wait_list_node);
-			spin_unlock(wait_list_lock);
+			spin_unlock_irqrestore(wait_list_lock, flags);
+
+			set_record_cq_info(requested_offset_record, cq_cb, cq_offset, target_value);
 
 			hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
 			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
@@ -3245,8 +3270,8 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
 			dev_dbg(buf->mmg->dev,
 				"ts node in middle of irq handling\n");
 
-			/* irq thread handling in the middle give it time to finish */
-			spin_unlock(wait_list_lock);
+			/* irq handling in the middle give it time to finish */
+			spin_unlock_irqrestore(wait_list_lock, flags);
 			usleep_range(100, 1000);
 			if (++iter_counter == MAX_TS_ITER_NUM) {
 				dev_err(buf->mmg->dev,
@@ -3260,14 +3285,11 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
 	} else {
 		/* Fill up the new registration node info */
 		requested_offset_record->ts_reg_info.buf = buf;
-		requested_offset_record->ts_reg_info.cq_cb = cq_cb;
 		requested_offset_record->ts_reg_info.timestamp_kernel_addr =
 				(u64 *) ts_buff->user_buff_address + ts_offset;
-		requested_offset_record->cq_kernel_addr =
-				(u64 *) cq_cb->kernel_address + cq_offset;
-		requested_offset_record->cq_target_value = target_value;
+		set_record_cq_info(requested_offset_record, cq_cb, cq_offset, target_value);
 
-		spin_unlock(wait_list_lock);
+		spin_unlock_irqrestore(wait_list_lock, flags);
 	}
 
 	*pend = requested_offset_record;
@@ -3277,6 +3299,58 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
 	return 0;
 }
 
+static int unregister_timestamp_node_ioctl(struct hl_device *hdev, struct hl_mem_mgr *mmg,
+		u64 ts_handle, u64 ts_offset, struct hl_user_interrupt *interrupt)
+{
+	struct hl_user_pending_interrupt *req_event_record, *pend, *temp_pend;
+	struct hl_mmap_mem_buf *buff;
+	struct hl_ts_buff *ts_buff;
+	bool ts_rec_found = false;
+	int rc;
+
+	buff = hl_mmap_mem_buf_get(mmg, ts_handle);
+	if (!buff) {
+		dev_err(hdev->dev, "invalid TS buff handle!\n");
+		return -EINVAL;
+	}
+
+	ts_buff = buff->private;
+
+	rc = validate_and_get_ts_record(hdev->dev, ts_buff, ts_offset, &req_event_record);
+	if (rc)
+		goto out;
+
+	/*
+	 * Note: we don't use the ts in_use field here, but we rather scan the list
+	 * because we cannot rely on the user to keep the order of register/unregister calls
+	 * and since we might have races here all the time between the irq and register/unregister
+	 * calls so it safer to lock the list and scan it to find the node.
+	 * If the node found on the list we mark it as not in use and delete it from the list,
+	 * if it's not here then the node was handled already in the irq before we get into
+	 * this ioctl.
+	 */
+	spin_lock(&interrupt->wait_list_lock);
+	list_for_each_entry_safe(pend, temp_pend, &interrupt->wait_list_head, wait_list_node) {
+		if (pend == req_event_record) {
+			pend->ts_reg_info.in_use = 0;
+			list_del(&pend->wait_list_node);
+			ts_rec_found = true;
+			break;
+		}
+	}
+	spin_unlock(&interrupt->wait_list_lock);
+
+	/* Put refcounts that were taken when we registered the event */
+	if (ts_rec_found) {
+		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
+		hl_cb_put(pend->ts_reg_info.cq_cb);
+	}
+out:
+	hl_mmap_mem_buf_put(buff);
+
+	return rc;
+}
+
 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
 				u64 timeout_us, u64 cq_counters_handle,	u64 cq_counters_offset,
@@ -3610,7 +3684,10 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		return -EINVAL;
 	}
 
-	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
+	if (args->in.flags & HL_WAIT_CS_FLAGS_UNREGISTER_INTERRUPT)
+		rc = unregister_timestamp_node_ioctl(hdev, &hpriv->mem_mgr,
+				args->in.timestamp_handle, args->in.timestamp_offset, interrupt);
+	else if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
 		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
 				args->in.interrupt_timeout_us, args->in.cq_counters_handle,
 				args->in.cq_counters_offset,
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index d9ef1b151d04..d45454b9a62e 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -1704,6 +1704,7 @@ union hl_cs_args {
 #define HL_WAIT_CS_FLAGS_MULTI_CS		0x4
 #define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ	0x10
 #define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT	0x20
+#define HL_WAIT_CS_FLAGS_UNREGISTER_INTERRUPT	0x40
 
 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN	32
 
-- 
2.40.0



More information about the dri-devel mailing list