[RFC PATCH 09/17] drm/amdkfd: CRIU restore queue ids
Felix Kuehling
Felix.Kuehling at amd.com
Sat May 1 01:57:44 UTC 2021
From: David Yat Sin <david.yatsin at amd.com>
When re-creating queues during CRIU restore, restore the queue with the
same queue id value used during CRIU dump. Adding a new private
structure queue_restore_data to store queue restore information.
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
Signed-off-by: David Yat Sin <david.yatsin at amd.com>
Change-Id: I6959da5d3aeebe5be6623483883ef79676591134
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 18 ++++++++++-----
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +++++
.../amd/amdkfd/kfd_process_queue_manager.c | 22 ++++++++++++++++++-
4 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a9a04148e94c..a21d32ff0730 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -313,7 +313,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
dev->id);
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
- &doorbell_offset_in_process);
+ NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -1905,7 +1905,7 @@ static void criu_dump_queue(struct kfd_process_device *pdd,
q_bucket->read_ptr_addr = (uint64_t)q->properties.read_ptr;
q_bucket->write_ptr_addr = (uint64_t)q->properties.write_ptr;
q_bucket->doorbell_id = q->doorbell_id;
- q_bucket->doorbell_off = q->properties.doorbell_off;
+
q_bucket->sdma_id = q->sdma_id;
q_bucket->eop_ring_buffer_address =
@@ -2122,7 +2122,8 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
int criu_restore_queue(struct kfd_process *p,
struct kfd_dev *dev,
struct kfd_process_device *pdd,
- struct kfd_criu_q_bucket *q_bucket)
+ struct kfd_criu_q_bucket *q_bucket,
+ struct queue_restore_data *qrd)
{
int ret = 0;
unsigned int queue_id;
@@ -2150,11 +2151,14 @@ int criu_restore_queue(struct kfd_process *p,
set_queue_properties_from_criu(&qp, q_bucket);
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, dev, NULL, &qp, &queue_id, NULL);
+ qrd->qid = q_bucket->q_id;
+
+ ret = pqm_create_queue(&p->pqm, dev, NULL, &qp, &queue_id, qrd, NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
return -EINVAL;
}
+
pr_debug("Queue id %d was restored successfully\n", queue_id);
return 0;
@@ -2178,6 +2182,10 @@ static int criu_restore_queues(struct kfd_process *p,
for (i = 0; i < args->num_of_queues; i++) {
struct kfd_criu_q_bucket q_bucket;
+ struct queue_restore_data qrd;
+
+ memset(&qrd, 0, sizeof(qrd));
+
ret = copy_from_user(&q_bucket, (void __user *)&user_buckets[i],
sizeof(struct kfd_criu_q_bucket));
@@ -2202,7 +2210,7 @@ static int criu_restore_queues(struct kfd_process *p,
ret = -EFAULT;
return ret;
}
- ret = criu_restore_queue(p, dev, pdd, &q_bucket);
+ ret = criu_restore_queue(p, dev, pdd, &q_bucket, &qrd);
if (ret) {
pr_err("Failed to restore queue (%d)\n", ret);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 159add0f5aaa..749a7a3bf191 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
- &properties, &qid, NULL);
+ &properties, &qid, NULL, NULL);
if (status) {
pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8278c43f4e50..d21b7eb08a76 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -468,6 +468,11 @@ enum KFD_QUEUE_PRIORITY {
* it's user mode or kernel mode queue.
*
*/
+
+struct queue_restore_data {
+ uint32_t qid;
+};
+
struct queue_properties {
enum kfd_queue_type type;
enum kfd_queue_format format;
@@ -1055,6 +1060,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ const struct queue_restore_data *qrd,
uint32_t *p_doorbell_offset_in_process);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 95a6c36cea4c..cb136e13baff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -42,6 +42,20 @@ static inline struct process_queue_node *get_queue_by_qid(
return NULL;
}
+static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
+ unsigned int qid)
+{
+ if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+ return -EINVAL;
+
+ if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
+ pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
static int find_available_queue_slot(struct process_queue_manager *pqm,
unsigned int *qid)
{
@@ -193,6 +207,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ const struct queue_restore_data *qrd,
uint32_t *p_doorbell_offset_in_process)
{
int retval;
@@ -224,7 +239,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
- retval = find_available_queue_slot(pqm, qid);
+ if (qrd) {
+ retval = assign_queue_slot_by_qid(pqm, qrd->qid);
+ *qid = qrd->qid;
+ } else
+ retval = find_available_queue_slot(pqm, qid);
+
if (retval != 0)
return retval;
--
2.17.1
More information about the amd-gfx
mailing list