[PATCH 03/11] drm/amdkfd: Add static user-mode queues support

Oded Gabbay oded.gabbay at gmail.com
Wed May 20 14:29:00 PDT 2015


From: Yair Shachar <yair.shachar at amd.com>

This patch adds support for static user-mode queues in QCM.
Queues which are designated as static can NOT be preempted by
the CP microcode when it is executing its scheduling algorithm.

This is needed for supporting the debugger feature, because we
can't allow the CP to preempt queues which are currently being debugged.

The number of queues that can be designated as static is limited by the
number of HQDs (Hardware Queue Descriptors).

Signed-off-by: Yair Shachar <yair.shachar at amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay at gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  2 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 38 ++++++++++++++----
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  6 +++
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c    | 46 +++++++++++++++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h       |  6 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  2 +
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 16 ++++++++
 7 files changed, 97 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 52cab0f..f1f86db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -33,6 +33,8 @@
 static const struct kfd_device_info kaveri_device_info = {
 	.asic_family = CHIP_KAVERI,
 	.max_pasid_bits = 16,
+	/* max num of queues for KV.TODO should be a dynamic value */
+	.max_no_of_hqd	= 24,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4e215bd..a5dc822 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+				bool preempt_static_queues, bool lock);
 
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 					struct queue *q,
@@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
 
 	BUG_ON(!dqm);
 
-	destroy_queues_cpsch(dqm, true);
+	destroy_queues_cpsch(dqm, true, true);
 
 	list_for_each_entry(node, &dqm->queues, list) {
 		pdd = qpd_to_pdd(node->qpd);
@@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	pr_debug("kfd: In %s\n", __func__);
 
 	mutex_lock(&dqm->lock);
-	destroy_queues_cpsch(dqm, false);
+	/* here we actually preempt the DIQ */
+	destroy_queues_cpsch(dqm, true, false);
 	list_del(&kq->list);
 	dqm->queue_count--;
 	qpd->is_debug = false;
@@ -935,13 +937,15 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm,
 				unsigned int sdma_engine)
 {
 	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
-			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
 			sdma_engine);
 }
 
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+				bool preempt_static_queues, bool lock)
 {
 	int retval;
+	enum kfd_preempt_type_filter preempt_type;
 
 	BUG_ON(!dqm);
 
@@ -960,8 +964,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 		destroy_sdma_queues(dqm, 1);
 	}
 
+	preempt_type = preempt_static_queues ?
+			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
+			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
+
 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
-			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
+			preempt_type, 0, false, 0);
 	if (retval != 0)
 		goto out;
 
@@ -989,7 +997,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 	if (lock)
 		mutex_lock(&dqm->lock);
 
-	retval = destroy_queues_cpsch(dqm, false);
+	retval = destroy_queues_cpsch(dqm, false, false);
 	if (retval != 0) {
 		pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
 		goto out;
@@ -1024,13 +1032,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 {
 	int retval;
 	struct mqd_manager *mqd;
+	bool preempt_all_queues;
 
 	BUG_ON(!dqm || !qpd || !q);
 
+	preempt_all_queues = false;
+
 	retval = 0;
 
 	/* remove queue from list to prevent rescheduling after preemption */
 	mutex_lock(&dqm->lock);
+
+	if (qpd->is_debug) {
+		/*
+		 * error, currently we do not allow to destroy a queue
+		 * of a currently debugged process
+		 */
+		retval = -EBUSY;
+		goto failed_try_destroy_debugged_queue;
+
+	}
+
 	mqd = dqm->ops.get_mqd_manager(dqm,
 			get_mqd_type_from_queue_type(q->properties.type));
 	if (!mqd) {
@@ -1062,6 +1084,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 	return 0;
 
 failed:
+failed_try_destroy_debugged_queue:
+
 	mutex_unlock(&dqm->lock);
 	return retval;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 57278e2..ec4036a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -88,9 +88,11 @@ struct device_queue_manager_ops {
 				struct queue *q,
 				struct qcm_process_device *qpd,
 				int *allocate_vmid);
+
 	int	(*destroy_queue)(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q);
+
 	int	(*update_queue)(struct device_queue_manager *dqm,
 				struct queue *q);
 
@@ -100,8 +102,10 @@ struct device_queue_manager_ops {
 
 	int	(*register_process)(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
+
 	int	(*unregister_process)(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
+
 	int	(*initialize)(struct device_queue_manager *dqm);
 	int	(*start)(struct device_queue_manager *dqm);
 	int	(*stop)(struct device_queue_manager *dqm);
@@ -109,9 +113,11 @@ struct device_queue_manager_ops {
 	int	(*create_kernel_queue)(struct device_queue_manager *dqm,
 					struct kernel_queue *kq,
 					struct qcm_process_device *qpd);
+
 	void	(*destroy_kernel_queue)(struct device_queue_manager *dqm,
 					struct kernel_queue *kq,
 					struct qcm_process_device *qpd);
+
 	bool	(*set_cache_memory_policy)(struct device_queue_manager *dqm,
 					   struct qcm_process_device *qpd,
 					   enum cache_policy default_policy,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e2533d8..99b6d28 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
 	num_queues = 0;
 	list_for_each_entry(cur, &qpd->queues_list, list)
 		num_queues++;
-	packet->bitfields10.num_queues = num_queues;
+	packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues;
 
 	packet->sh_mem_config = qpd->sh_mem_config;
 	packet->sh_mem_bases = qpd->sh_mem_bases;
@@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
 }
 
 static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
-				struct queue *q)
+				struct queue *q, bool is_static)
 {
 	struct pm4_map_queues *packet;
+	bool use_static = is_static;
 
 	BUG_ON(!pm || !buffer || !q);
 
@@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
 	case KFD_QUEUE_TYPE_SDMA:
 		packet->bitfields2.engine_sel =
 				engine_sel__mes_map_queues__sdma0;
+		use_static = false; /* no static queues under SDMA */
 		break;
 	default:
 		BUG();
@@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
 	packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
 			q->properties.doorbell_off;
 
+	packet->mes_map_queues_ordinals[0].bitfields3.is_static =
+			(use_static == true) ? 1 : 0;
+
 	packet->mes_map_queues_ordinals[0].mqd_addr_lo =
 			lower_32_bits(q->gart_mqd_addr);
 
@@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 			pm_release_ib(pm);
 			return -ENOMEM;
 		}
+
 		retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
 		if (retval != 0)
 			return retval;
+
 		proccesses_mapped++;
 		inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
 				alloc_size_bytes);
@@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		list_for_each_entry(kq, &qpd->priv_queue_list, list) {
 			if (kq->queue->properties.is_active != true)
 				continue;
+
+			pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
+				kq->queue->queue, qpd->is_debug);
+
 			retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
-							kq->queue);
+						kq->queue, qpd->is_debug);
 			if (retval != 0)
 				return retval;
-			inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
-					alloc_size_bytes);
+
+			inc_wptr(&rl_wptr,
+				sizeof(struct pm4_map_queues),
+				alloc_size_bytes);
 		}
 
 		list_for_each_entry(q, &qpd->queues_list, list) {
 			if (q->properties.is_active != true)
 				continue;
-			retval = pm_create_map_queue(pm,
-						&rl_buffer[rl_wptr], q);
+
+			pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
+				q->queue, qpd->is_debug);
+
+			retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
+						q,  qpd->is_debug);
+
 			if (retval != 0)
 				return retval;
-			inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
-					alloc_size_bytes);
+
+			inc_wptr(&rl_wptr,
+				sizeof(struct pm4_map_queues),
+				alloc_size_bytes);
 		}
 	}
 
@@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 
 	packet = (struct pm4_unmap_queues *)buffer;
 	memset(buffer, 0, sizeof(struct pm4_unmap_queues));
-
+	pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
+		mode, reset, type);
 	packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
 					sizeof(struct pm4_unmap_queues));
 	switch (type) {
@@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 		packet->bitfields2.queue_sel =
 				queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
 		break;
+	case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
+		/* in this case, we do not preempt static queues */
+		packet->bitfields2.queue_sel =
+				queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
+		break;
 	default:
 		BUG();
 		break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
index 071ad57..5b393f3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -237,7 +237,8 @@ struct pm4_map_queues {
 	struct {
 		union {
 			struct {
-				uint32_t reserved5:2;
+				uint32_t is_static:1;
+				uint32_t reserved5:1;
 				uint32_t doorbell_offset:21;
 				uint32_t reserved6:3;
 				uint32_t queue:6;
@@ -328,7 +329,8 @@ enum unmap_queues_action_enum {
 enum unmap_queues_queue_sel_enum {
 	queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
 	queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
-	queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2
+	queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
+	queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
 };
 
 enum unmap_queues_engine_sel_enum {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8fd6fb6..8e99d2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -128,6 +128,7 @@ struct kfd_device_info {
 	unsigned int asic_family;
 	const struct kfd_event_interrupt_class *event_interrupt_class;
 	unsigned int max_pasid_bits;
+	unsigned int max_no_of_hqd;
 	size_t ih_ring_entry_size;
 	uint8_t num_of_watch_points;
 	uint16_t mqd_size_aligned;
@@ -230,6 +231,7 @@ struct device *kfd_chardev(void);
 enum kfd_preempt_type_filter {
 	KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
 	KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
+	KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
 	KFD_PREEMPT_TYPE_FILTER_BY_PASID
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 530b82c..85b7fec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	struct queue *q;
 	struct process_queue_node *pqn;
 	struct kernel_queue *kq;
+	int num_queues = 0;
+	struct queue *cur;
 
 	BUG_ON(!pqm || !dev || !properties || !qid);
 
@@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		return -1;
 	}
 
+	/*
+	 * for debug process, verify that it is within the static queues limit
+	 * currently limit is set to half of the total avail HQD slots
+	 * If we are just about to create DIQ, the is_debug flag is not set yet
+	 * Hence we also check the type as well
+	 */
+	if ((pdd->qpd.is_debug) ||
+		(type == KFD_QUEUE_TYPE_DIQ)) {
+		list_for_each_entry(cur, &pdd->qpd.queues_list, list)
+			num_queues++;
+		if (num_queues >= dev->device_info->max_no_of_hqd/2)
+			return (-ENOSPC);
+	}
+
 	retval = find_available_queue_slot(pqm, qid);
 	if (retval != 0)
 		return retval;
-- 
2.1.0



More information about the dri-devel mailing list