[PATCH 1/4] drm/amdkfd: Fix sdma queue allocate race condition
Zeng, Oak
Oak.Zeng at amd.com
Fri May 31 21:19:04 UTC 2019
SDMA queue allocation requires the dqm lock as it modify
the global dqm members. Introduce functions to allocate/deallocate
in locked/unlocked circumstance.
Change-Id: Id3084524c5f65d9629b12cf6b4862a7516945cb1
Signed-off-by: Oak Zeng <Oak.Zeng at amd.com>
---
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 46 ++++++++++++++++------
1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ece35c7..1f707bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -61,6 +61,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q);
+static void deallocate_sdma_queue_locked(struct device_queue_manager *dqm,
+ struct queue *q);
static void kfd_process_hw_exception(struct work_struct *work);
@@ -446,10 +448,10 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
deallocate_hqd(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue_locked(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue_locked(dqm, q);
} else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
@@ -922,8 +924,10 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (dqm->sdma_bitmap == 0)
return -ENOMEM;
+ dqm_lock(dqm);
bit = __ffs64(dqm->sdma_bitmap);
dqm->sdma_bitmap &= ~(1ULL << bit);
+ dqm_unlock(dqm);
q->sdma_id = bit;
q->properties.sdma_engine_id = q->sdma_id %
get_num_sdma_engines(dqm);
@@ -932,8 +936,10 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (dqm->xgmi_sdma_bitmap == 0)
return -ENOMEM;
+ dqm_lock(dqm);
bit = __ffs64(dqm->xgmi_sdma_bitmap);
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+ dqm_unlock(dqm);
q->sdma_id = bit;
/* sdma_engine_id is sdma id including
* both PCIe-optimized SDMAs and XGMI-
@@ -953,17 +959,35 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
return 0;
}
+static void deallocate_sdma_queue_locked(struct device_queue_manager *dqm,
+ struct queue *q)
+{
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ if (q->sdma_id >= get_num_sdma_queues(dqm))
+ return;
+ dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
+ return;
+ dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+ }
+}
+
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q)
{
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (q->sdma_id >= get_num_sdma_queues(dqm))
return;
+ dqm_lock(dqm);
dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+ dqm_unlock(dqm);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
return;
+ dqm_lock(dqm);
dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+ dqm_unlock(dqm);
}
}
@@ -982,7 +1006,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
retval = allocate_doorbell(qpd, q);
if (retval)
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_sdma_queue_locked;
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1001,8 +1025,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
-out_deallocate_sdma_queue:
- deallocate_sdma_queue(dqm, q);
+out_deallocate_sdma_queue_locked:
+ deallocate_sdma_queue_locked(dqm, q);
return retval;
}
@@ -1194,7 +1218,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = allocate_doorbell(qpd, q);
if (retval)
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_sdma_queue_locked;
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
@@ -1242,7 +1266,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
-out_deallocate_sdma_queue:
+out_deallocate_sdma_queue_locked:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
@@ -1396,10 +1420,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue_locked(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue_locked(dqm, q);
}
list_del(&q->list);
@@ -1625,10 +1649,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue_locked(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue_locked(dqm, q);
}
if (q->properties.is_active)
--
2.7.4
More information about the amd-gfx
mailing list