[Patch v4 10/24] drm/amdkfd: CRIU restore queue ids
Rajneesh Bhardwaj
rajneesh.bhardwaj at amd.com
Thu Dec 23 00:36:57 UTC 2021
From: David Yat Sin <david.yatsin at amd.com>
When re-creating queues during CRIU restore, restore the queue with the
same queue id value used during CRIU dump.
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
Signed-off-by: David Yat Sin <david.yatsin at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +
.../amd/amdkfd/kfd_process_queue_manager.c | 37 +++++++++++++++----
4 files changed, 34 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9665c8657929..3fb155f756fd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -312,7 +312,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
p->pasid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL,
&doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 1e30717b5253..0c50e67e2b51 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
- &properties, &qid, NULL);
+ &properties, &qid, NULL, NULL);
if (status) {
pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7c2679a23aa3..8272bd5c4600 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -461,6 +461,7 @@ enum KFD_QUEUE_PRIORITY {
* it's user mode or kernel mode queue.
*
*/
+
struct queue_properties {
enum kfd_queue_type type;
enum kfd_queue_format format;
@@ -1156,6 +1157,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ const struct kfd_criu_queue_priv_data *q_data,
uint32_t *p_doorbell_offset_in_process);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 480ad794df4e..275aeebc58fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -42,6 +42,20 @@ static inline struct process_queue_node *get_queue_by_qid(
return NULL;
}
+static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
+ unsigned int qid)
+{
+ if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+ return -EINVAL;
+
+ if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
+ pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
static int find_available_queue_slot(struct process_queue_manager *pqm,
unsigned int *qid)
{
@@ -194,6 +208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ const struct kfd_criu_queue_priv_data *q_data,
uint32_t *p_doorbell_offset_in_process)
{
int retval;
@@ -225,7 +240,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
- retval = find_available_queue_slot(pqm, qid);
+ if (q_data) {
+ retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
+ *qid = q_data->q_id;
+ } else
+ retval = find_available_queue_slot(pqm, qid);
+
if (retval != 0)
return retval;
@@ -528,7 +548,7 @@ int kfd_process_get_queue_info(struct kfd_process *p,
return 0;
}
-static void criu_dump_queue(struct kfd_process_device *pdd,
+static void criu_checkpoint_queue(struct kfd_process_device *pdd,
struct queue *q,
struct kfd_criu_queue_priv_data *q_data)
{
@@ -560,7 +580,7 @@ static void criu_dump_queue(struct kfd_process_device *pdd,
pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
}
-static int criu_dump_queues_device(struct kfd_process_device *pdd,
+static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
uint8_t __user *user_priv,
unsigned int *q_index,
uint64_t *queues_priv_data_offset)
@@ -582,7 +602,8 @@ static int criu_dump_queues_device(struct kfd_process_device *pdd,
return -EOPNOTSUPP;
}
- criu_dump_queue(pdd, q, q_data);
+ criu_checkpoint_queue(pdd, q, q_data);
+ q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
ret = copy_to_user(user_priv + *queues_priv_data_offset, q_data, sizeof(*q_data));
if (ret) {
@@ -608,10 +629,12 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p,
struct kfd_process_device *pdd = p->pdds[pdd_index];
/*
- * criu_dump_queues_device will copy data to user and update q_index and
+ * criu_checkpoint_queues_device will copy data to user and update q_index and
* queues_priv_data_offset
*/
- ret = criu_dump_queues_device(pdd, user_priv_data, &q_index, priv_data_offset);
+ ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
+ priv_data_offset);
+
if (ret)
break;
}
@@ -693,7 +716,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL);
+ ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
ret = -EINVAL;
--
2.17.1
More information about the amd-gfx
mailing list