[PATCH 2/2] drm/amdkfd: CRIU add support for GWS queues

David Yat Sin david.yatsin at amd.com
Wed Apr 13 15:51:45 UTC 2022


Adding support to checkpoint/restore GWS(Global Wave Sync) queues.

Signed-off-by: David Yat Sin <david.yatsin at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  4 ++--
 .../amd/amdkfd/kfd_process_queue_manager.c    | 22 ++++++++++++++-----
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e1e2362841f8..b2c72ddd9c1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1377,7 +1377,7 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
 		goto out_unlock;
 	}
 
-	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
+	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL, false);
 	mutex_unlock(&p->mutex);
 
 	args->first_gws = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f36062be9ca8..3ec32675210c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1102,7 +1102,7 @@ struct kfd_criu_queue_priv_data {
 	uint32_t priority;
 	uint32_t q_percent;
 	uint32_t doorbell_id;
-	uint32_t is_gws;
+	uint32_t gws;
 	uint32_t sdma_id;
 	uint32_t eop_ring_buffer_size;
 	uint32_t ctx_save_restore_area_size;
@@ -1199,7 +1199,7 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int
 int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
 			struct mqd_update_info *minfo);
 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
-			void *gws);
+			void *gws, bool criu_restore);
 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
 						unsigned int qid);
 struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 6eca9509f2e3..3eb9d43fdaac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -91,7 +91,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
 }
 
 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
-			void *gws)
+		void *gws, bool criu_restore)
 {
 	struct kfd_dev *dev = NULL;
 	struct process_queue_node *pqn;
@@ -135,8 +135,14 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
 	pqn->q->gws = mem;
 	pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
 
-	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
-							pqn->q, NULL);
+	ret = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
+						    pqn->q, NULL);
+
+	if (!ret && criu_restore) {
+		dev->dqm->gws_queue_count++;
+		pdd->qpd.mapped_gws_queue = true;
+	}
+	return ret;
 }
 
 void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
@@ -636,6 +642,8 @@ static int criu_checkpoint_queue(struct kfd_process_device *pdd,
 	q_data->ctx_save_restore_area_size =
 		q->properties.ctx_save_restore_area_size;
 
+	q_data->gws = !!q->gws;
+
 	ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
 	if (ret) {
 		pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
@@ -743,7 +751,6 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
 					  struct kfd_criu_queue_priv_data *q_data)
 {
 	qp->is_interop = false;
-	qp->is_gws = q_data->is_gws;
 	qp->queue_percent = q_data->q_percent;
 	qp->priority = q_data->priority;
 	qp->queue_address = q_data->q_address;
@@ -826,12 +833,15 @@ int kfd_criu_restore_queue(struct kfd_process *p,
 				NULL);
 	if (ret) {
 		pr_err("Failed to create new queue err:%d\n", ret);
-		ret = -EINVAL;
+		goto exit;
 	}
 
+	if (q_data->gws)
+		ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws, true);
+
 exit:
 	if (ret)
-		pr_err("Failed to create queue (%d)\n", ret);
+		pr_err("Failed to restore queue (%d)\n", ret);
 	else
 		pr_debug("Queue id %d was restored successfully\n", queue_id);
 
-- 
2.30.2



More information about the amd-gfx mailing list