[PATCH 4/6] amdkfd: Add SDMA user-mode queues support to QCM
Oded Gabbay
oded.gabbay at amd.com
Sun Dec 14 06:55:10 PST 2014
From: Ben Goz <ben.goz at amd.com>
This patch adds support for SDMA user-mode queues to the QCM - the Queue
management system that manages queues-per-device and queues-per-process.
Signed-off-by: Ben Goz <ben.goz at amd.com>
---
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 167 +++++++++++++++++++--
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 +
.../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +-
3 files changed, 164 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 7b6df51..55ee2da 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -46,9 +46,24 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
+
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd);
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+ unsigned int sdma_queue_id);
+
+static inline
+enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
+{
+ if (type == KFD_QUEUE_TYPE_SDMA)
+ return KFD_MQD_TYPE_CIK_SDMA;
+ return KFD_MQD_TYPE_CIK_CP;
+}
static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
{
@@ -189,7 +204,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
*allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid;
- retval = create_compute_queue_nocpsch(dqm, q, qpd);
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+ retval = create_compute_queue_nocpsch(dqm, q, qpd);
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ retval = create_sdma_queue_nocpsch(dqm, q, qpd);
if (retval != 0) {
if (list_empty(&qpd->queues_list)) {
@@ -202,7 +220,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
list_add(&q->list, &qpd->queues_list);
dqm->queue_count++;
-
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count++;
mutex_unlock(&dqm->lock);
return 0;
}
@@ -279,8 +298,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q)
{
int retval;
- struct mqd_manager *mqd;
-
+ struct mqd_manager *mqd, *mqd_sdma;
BUG_ON(!dqm || !q || !q->mqd || !qpd);
retval = 0;
@@ -294,6 +312,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
goto out;
}
+ mqd_sdma = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA);
+ if (mqd_sdma == NULL) {
+ mutex_unlock(&dqm->lock);
+ return -ENOMEM;
+ }
+
retval = mqd->destroy_mqd(mqd, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
@@ -302,7 +326,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
if (retval != 0)
goto out;
- deallocate_hqd(dqm, q);
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+ deallocate_hqd(dqm, q);
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ dqm->sdma_queue_count--;
+ deallocate_sdma_queue(dqm, q->sdma_id);
+ }
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
@@ -324,7 +353,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
BUG_ON(!dqm || !q || !q->mqd);
mutex_lock(&dqm->lock);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ mqd = dqm->get_mqd_manager(dqm, q->properties.type);
if (mqd == NULL) {
mutex_unlock(&dqm->lock);
return -ENOMEM;
@@ -536,6 +565,11 @@ static int init_pipelines(struct device_queue_manager *dqm,
}
+static int init_sdma_engines(struct device_queue_manager *dqm)
+{
+ return kfd2kgd->init_sdma_engines(dqm->dev->kgd);
+}
+
static int init_scheduler(struct device_queue_manager *dqm)
{
int retval;
@@ -549,7 +583,10 @@ static int init_scheduler(struct device_queue_manager *dqm)
return retval;
retval = init_memory(dqm);
+ if (retval != 0)
+ return retval;
+ retval = init_sdma_engines(dqm);
return retval;
}
@@ -565,6 +602,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+ dqm->sdma_queue_count = 0;
dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
sizeof(unsigned int), GFP_KERNEL);
if (!dqm->allocated_queues) {
@@ -576,6 +614,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
+ dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
init_scheduler(dqm);
return 0;
@@ -607,6 +646,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
return 0;
}
+static int allocate_sdma_queue(struct device_queue_manager *dqm,
+ unsigned int *sdma_queue_id)
+{
+ int bit;
+
+ if (dqm->sdma_bitmap == 0)
+ return -ENOMEM;
+
+ bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
+ CIK_SDMA_QUEUES);
+
+ clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
+ *sdma_queue_id = bit;
+
+ return 0;
+}
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+ unsigned int sdma_queue_id)
+{
+ if (sdma_queue_id < 0 || sdma_queue_id >= CIK_SDMA_QUEUES)
+ return;
+ set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
+}
+
+static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ uint32_t value = ATC;
+
+ if (q->process->is_32bit_user_mode)
+ value |= VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
+ else
+ value |= VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
+ qpd_to_pdd(qpd)));
+ q->properties.sdma_vm_addr = value;
+}
+
+static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct mqd_manager *mqd;
+ int retval;
+
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA);
+ if (!mqd)
+ return -ENOMEM;
+
+ retval = allocate_sdma_queue(dqm, &q->sdma_id);
+ if (retval != 0)
+ return retval;
+
+ q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
+ q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
+
+ pr_debug("kfd: sdma id is: %d\n", q->sdma_id);
+ pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id);
+ pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id);
+
+ retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
+ if (retval != 0) {
+ deallocate_sdma_queue(dqm, q->sdma_id);
+ return retval;
+ }
+
+ init_sdma_vm(dqm, q, qpd);
+ return 0;
+}
+
/*
* Device Queue Manager implementation for cp scheduler
*/
@@ -648,6 +758,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->processes_count = 0;
+ dqm->sdma_queue_count = 0;
dqm->active_runlist = false;
retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
if (retval != 0)
@@ -692,6 +803,8 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->fence_addr = dqm->fence_mem->cpu_ptr;
dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
+ init_sdma_engines(dqm);
+
list_for_each_entry(node, &dqm->queues, list)
if (node->qpd->pqm->process && dqm->dev)
kfd_bind_process_to_device(dqm->dev,
@@ -762,6 +875,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
mutex_unlock(&dqm->lock);
}
+static void select_sdma_engine_id(struct queue *q)
+{
+ static int sdma_id;
+
+ q->sdma_id = sdma_id;
+ sdma_id = (sdma_id + 1) % 2;
+}
+
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd, int *allocate_vmid)
{
@@ -777,7 +898,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
mutex_lock(&dqm->lock);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ select_sdma_engine_id(q);
+
+ mqd = dqm->get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
+
if (mqd == NULL) {
mutex_unlock(&dqm->lock);
return -ENOMEM;
@@ -794,6 +920,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = execute_queues_cpsch(dqm, false);
}
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count++;
+
out:
mutex_unlock(&dqm->lock);
return retval;
@@ -817,6 +946,14 @@ static int fence_wait_timeout(unsigned int *fence_addr,
return 0;
}
+static int destroy_sdma_queues(struct device_queue_manager *dqm,
+ unsigned int sdma_engine)
+{
+ return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
+ KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+ sdma_engine);
+}
+
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
{
int retval;
@@ -829,6 +966,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
mutex_lock(&dqm->lock);
if (dqm->active_runlist == false)
goto out;
+
+ pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
+ dqm->sdma_queue_count);
+
+ if (dqm->sdma_queue_count > 0) {
+ destroy_sdma_queues(dqm, 0);
+ destroy_sdma_queues(dqm, 1);
+ }
+
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
if (retval != 0)
@@ -900,13 +1046,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
/* remove queue from list to prevent rescheduling after preemption */
mutex_lock(&dqm->lock);
-
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
+ mqd = dqm->get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
retval = -ENOMEM;
goto failed;
}
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count--;
+
list_del(&q->list);
dqm->queue_count--;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index c3f189e8..554c06e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -36,6 +36,9 @@
#define KFD_VMID_START_OFFSET (8)
#define VMID_PER_DEVICE CIK_VMID_NUM
#define KFD_DQM_FIRST_PIPE (0)
+#define CIK_SDMA_QUEUES (4)
+#define CIK_SDMA_QUEUES_PER_ENGINE (2)
+#define CIK_SDMA_ENGINE_NUM (2)
struct device_process_node {
struct qcm_process_device *qpd;
@@ -130,8 +133,10 @@ struct device_queue_manager {
struct list_head queues;
unsigned int processes_count;
unsigned int queue_count;
+ unsigned int sdma_queue_count;
unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues;
+ unsigned int sdma_bitmap;
unsigned int vmid_bitmap;
uint64_t pipelines_addr;
struct kfd_mem_obj *pipeline_mem;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index d12f9d3..948b1ca 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
/* let DQM handle it*/
q_properties->vmid = 0;
q_properties->queue_id = qid;
- q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
retval = init_queue(q, *q_properties);
if (retval != 0)
@@ -189,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
switch (type) {
+ case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_COMPUTE:
/* check if there is over subscription */
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
--
1.9.1
More information about the dri-devel
mailing list