[RFC PATCH 09/17] drm/amdkfd: CRIU restore queue ids

Felix Kuehling Felix.Kuehling at amd.com
Sat May 1 01:57:44 UTC 2021


From: David Yat Sin <david.yatsin at amd.com>

When re-creating queues during CRIU restore, restore the queue with the
same queue id value used during CRIU dump. Adding a new private
structure queue_restore_data to store queue restore information.

Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
Signed-off-by: David Yat Sin <david.yatsin at amd.com>
Change-Id: I6959da5d3aeebe5be6623483883ef79676591134
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 18 ++++++++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c       |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  6 +++++
 .../amd/amdkfd/kfd_process_queue_manager.c    | 22 ++++++++++++++++++-
 4 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a9a04148e94c..a21d32ff0730 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -313,7 +313,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 			dev->id);
 
 	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
-			&doorbell_offset_in_process);
+			NULL, &doorbell_offset_in_process);
 	if (err != 0)
 		goto err_create_queue;
 
@@ -1905,7 +1905,7 @@ static void criu_dump_queue(struct kfd_process_device *pdd,
 	q_bucket->read_ptr_addr = (uint64_t)q->properties.read_ptr;
 	q_bucket->write_ptr_addr = (uint64_t)q->properties.write_ptr;
 	q_bucket->doorbell_id = q->doorbell_id;
-	q_bucket->doorbell_off = q->properties.doorbell_off;
+
 	q_bucket->sdma_id = q->sdma_id;
 
 	q_bucket->eop_ring_buffer_address =
@@ -2122,7 +2122,8 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
 int criu_restore_queue(struct kfd_process *p,
 					struct kfd_dev *dev,
 					struct kfd_process_device *pdd,
-					struct kfd_criu_q_bucket *q_bucket)
+					struct kfd_criu_q_bucket *q_bucket,
+					struct queue_restore_data *qrd)
 {
 	int ret = 0;
 	unsigned int queue_id;
@@ -2150,11 +2151,14 @@ int criu_restore_queue(struct kfd_process *p,
 	set_queue_properties_from_criu(&qp, q_bucket);
 	print_queue_properties(&qp);
 
-	ret = pqm_create_queue(&p->pqm, dev, NULL, &qp, &queue_id, NULL);
+	qrd->qid = q_bucket->q_id;
+
+	ret = pqm_create_queue(&p->pqm, dev, NULL, &qp, &queue_id, qrd, NULL);
 	if (ret) {
 		pr_err("Failed to create new queue err:%d\n", ret);
 		return -EINVAL;
 	}
+
 	pr_debug("Queue id %d was restored successfully\n", queue_id);
 
 	return 0;
@@ -2178,6 +2182,10 @@ static int criu_restore_queues(struct kfd_process *p,
 
 	for (i = 0; i < args->num_of_queues; i++) {
 		struct kfd_criu_q_bucket q_bucket;
+		struct queue_restore_data qrd;
+
+		memset(&qrd, 0, sizeof(qrd));
+
 		ret = copy_from_user(&q_bucket, (void __user *)&user_buckets[i],
 				sizeof(struct kfd_criu_q_bucket));
 
@@ -2202,7 +2210,7 @@ static int criu_restore_queues(struct kfd_process *p,
 			ret = -EFAULT;
 			return ret;
 		}
-		ret = criu_restore_queue(p, dev, pdd, &q_bucket);
+		ret = criu_restore_queue(p, dev, pdd, &q_bucket, &qrd);
 		if (ret) {
 			pr_err("Failed to restore queue (%d)\n", ret);
 			break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 159add0f5aaa..749a7a3bf191 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
 	properties.type = KFD_QUEUE_TYPE_DIQ;
 
 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
-				&properties, &qid, NULL);
+				&properties, &qid, NULL, NULL);
 
 	if (status) {
 		pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8278c43f4e50..d21b7eb08a76 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -468,6 +468,11 @@ enum KFD_QUEUE_PRIORITY {
  * it's user mode or kernel mode queue.
  *
  */
+
+struct queue_restore_data {
+	uint32_t qid;
+};
+
 struct queue_properties {
 	enum kfd_queue_type type;
 	enum kfd_queue_format format;
@@ -1055,6 +1060,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			    struct file *f,
 			    struct queue_properties *properties,
 			    unsigned int *qid,
+			    const struct queue_restore_data *qrd,
 			    uint32_t *p_doorbell_offset_in_process);
 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 95a6c36cea4c..cb136e13baff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -42,6 +42,20 @@ static inline struct process_queue_node *get_queue_by_qid(
 	return NULL;
 }
 
+static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
+				    unsigned int qid)
+{
+	if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+		return -EINVAL;
+
+	if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
+		pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
 static int find_available_queue_slot(struct process_queue_manager *pqm,
 					unsigned int *qid)
 {
@@ -193,6 +207,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			    struct file *f,
 			    struct queue_properties *properties,
 			    unsigned int *qid,
+			    const struct queue_restore_data *qrd,
 			    uint32_t *p_doorbell_offset_in_process)
 {
 	int retval;
@@ -224,7 +239,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	if (pdd->qpd.queue_count >= max_queues)
 		return -ENOSPC;
 
-	retval = find_available_queue_slot(pqm, qid);
+	if (qrd) {
+		retval = assign_queue_slot_by_qid(pqm, qrd->qid);
+		*qid = qrd->qid;
+	} else
+		retval = find_available_queue_slot(pqm, qid);
+
 	if (retval != 0)
 		return retval;
 
-- 
2.17.1



More information about the amd-gfx mailing list