[PATCH 5/9] drm/amdkfd: Improve process termination handling in the queue manager
Felix Kuehling
Felix.Kuehling at amd.com
Wed Sep 27 04:09:52 UTC 2017
Separate device queue termination from process queue manager
termination. Unmap all queues at once instead of one at a time.
Unmap device queues before the PASID is unbound, in the
kfd_process_iommu_unbind_callback.
When resetting wavefronts in non-HWS mode, do it before the VMID is
released.
Signed-off-by: Ben Goz <ben.goz at amd.com>
Signed-off-by: shaoyun liu <shaoyun.liu at amd.com>
Signed-off-by: Amber Lin <Amber.Lin at amd.com>
Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 187 ++++++++++++++++-----
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 +
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 18 +-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 36 ++--
.../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 37 ++--
5 files changed, 200 insertions(+), 83 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0f2a756..d0e7288 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -295,65 +295,73 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
return retval;
}
-static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
+ * to avoid asynchronized access
+ */
+static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
int retval;
struct mqd_manager *mqd;
- retval = 0;
-
- mutex_lock(&dqm->lock);
+ mqd = dqm->ops.get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
+ if (!mqd)
+ return -ENOMEM;
- if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
- mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
- if (mqd == NULL) {
- retval = -ENOMEM;
- goto out;
- }
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q);
- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
- if (mqd == NULL) {
- retval = -ENOMEM;
- goto out;
- }
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
} else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
- retval = -EINVAL;
- goto out;
+ return -EINVAL;
}
+ dqm->total_queue_count--;
retval = mqd->destroy_mqd(mqd, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
q->pipe, q->queue);
-
- if (retval)
- goto out;
+ if (retval == -ETIME)
+ qpd->reset_wavefronts = true;
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
list_del(&q->list);
- if (list_empty(&qpd->queues_list))
+ if (list_empty(&qpd->queues_list)) {
+ if (qpd->reset_wavefronts) {
+ pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
+ dqm->dev);
+ /* dbgdev_wave_reset_wavefronts has to be called before
+ * deallocate_vmid(), i.e. when vmid is still in use.
+ */
+ dbgdev_wave_reset_wavefronts(dqm->dev,
+ qpd->pqm->process);
+ qpd->reset_wavefronts = false;
+ }
+
deallocate_vmid(dqm, qpd, q);
+ }
if (q->properties.is_active)
dqm->queue_count--;
- /*
- * Unconditionally decrement this counter, regardless of the queue's
- * type
- */
- dqm->total_queue_count--;
- pr_debug("Total of %d queues are accountable so far\n",
- dqm->total_queue_count);
+ return retval;
+}
-out:
+static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ struct queue *q)
+{
+ int retval;
+
+ mutex_lock(&dqm->lock);
+ retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
mutex_unlock(&dqm->lock);
+
return retval;
}
@@ -919,10 +927,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param)
{
- int retval;
- struct kfd_process_device *pdd;
-
- retval = 0;
+ int retval = 0;
if (!dqm->active_runlist)
return retval;
@@ -946,12 +951,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
/* should be timed out */
retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
- if (retval) {
- pdd = kfd_get_process_device_data(dqm->dev,
- kfd_get_process(current));
- pdd->reset_wavefronts = true;
+ if (retval)
return retval;
- }
+
pm_release_ib(&dqm->packets);
dqm->active_runlist = false;
@@ -1017,7 +1019,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
if (q->properties.is_active)
dqm->queue_count--;
- execute_queues_cpsch(dqm, false);
+ retval = execute_queues_cpsch(dqm, false);
+ if (retval == -ETIME)
+ qpd->reset_wavefronts = true;
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
@@ -1107,6 +1111,107 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
return retval;
}
+static int process_termination_nocpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct queue *q, *next;
+ struct device_process_node *cur, *next_dpn;
+ int retval = 0;
+
+ mutex_lock(&dqm->lock);
+
+ /* Clear all user mode queues */
+ list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+ int ret;
+
+ ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
+ if (ret)
+ retval = ret;
+ }
+
+ /* Unregister process */
+ list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
+ if (qpd == cur->qpd) {
+ list_del(&cur->list);
+ kfree(cur);
+ dqm->processes_count--;
+ break;
+ }
+ }
+
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+
+static int process_termination_cpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ int retval;
+ struct queue *q, *next;
+ struct kernel_queue *kq, *kq_next;
+ struct mqd_manager *mqd;
+ struct device_process_node *cur, *next_dpn;
+ bool unmap_static_queues = false;
+
+ retval = 0;
+
+ mutex_lock(&dqm->lock);
+
+ /* Clean all kernel queues */
+ list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
+ list_del(&kq->list);
+ dqm->queue_count--;
+ qpd->is_debug = false;
+ dqm->total_queue_count--;
+ unmap_static_queues = true;
+ }
+
+ /* Clear all user mode queues */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count--;
+
+ if (q->properties.is_active)
+ dqm->queue_count--;
+
+ dqm->total_queue_count--;
+ }
+
+ /* Unregister process */
+ list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
+ if (qpd == cur->qpd) {
+ list_del(&cur->list);
+ kfree(cur);
+ dqm->processes_count--;
+ break;
+ }
+ }
+
+ retval = execute_queues_cpsch(dqm, unmap_static_queues);
+ if (retval || qpd->reset_wavefronts) {
+ pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
+ dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
+ qpd->reset_wavefronts = false;
+ }
+
+ /* lastly, free mqd resources */
+ list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+ mqd = dqm->ops.get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
+ if (!mqd) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ list_del(&q->list);
+ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+ }
+
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
{
struct device_queue_manager *dqm;
@@ -1135,6 +1240,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+ dqm->ops.process_termination = process_termination_cpsch;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
@@ -1149,6 +1255,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.initialize = initialize_nocpsch;
dqm->ops.uninitialize = uninitialize;
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+ dqm->ops.process_termination = process_termination_nocpsch;
break;
default:
pr_err("Invalid scheduling policy %d\n", sched_policy);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 60d46ce..31c2b1f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -77,6 +77,8 @@ struct device_process_node {
* @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
* memory apertures.
*
+ * @process_termination: Clears all process queues belongs to that device.
+ *
*/
struct device_queue_manager_ops {
@@ -120,6 +122,9 @@ struct device_queue_manager_ops {
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
+
+ int (*process_termination)(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
};
struct device_queue_manager_asic_ops {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index b0a5bea..b936a12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -424,6 +424,12 @@ struct qcm_process_device {
unsigned int queue_count;
unsigned int vmid;
bool is_debug;
+
+ /* This flag tells if we should reset all wavefronts on
+ * process termination
+ */
+ bool reset_wavefronts;
+
/*
* All the memory management data should be here too
*/
@@ -457,6 +463,8 @@ struct kfd_process_device {
/* The device that owns this data. */
struct kfd_dev *dev;
+ /* The process that owns this kfd_process_device. */
+ struct kfd_process *process;
/* per-process-per device QCM data structure */
struct qcm_process_device qpd;
@@ -472,10 +480,12 @@ struct kfd_process_device {
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;
- /* This flag tells if we should reset all
- * wavefronts on process termination
+ /* Flag used to tell the pdd has dequeued from the dqm.
+ * This is used to prevent dev->dqm->ops.process_termination() from
+ * being called twice when it is already called in IOMMU callback
+ * function.
*/
- bool reset_wavefronts;
+ bool already_dequeued;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -657,6 +667,8 @@ struct process_queue_node {
struct list_head process_queue_list;
};
+void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
+void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
void pqm_uninit(struct process_queue_manager *pqm);
int pqm_create_queue(struct process_queue_manager *pqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 0464c31..82ad037 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -171,9 +171,6 @@ static void kfd_process_wq_release(struct work_struct *work)
pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
pdd->dev->id, p->pasid);
- if (pdd->reset_wavefronts)
- dbgdev_wave_reset_wavefronts(pdd->dev, p);
-
if (pdd->bound == PDD_BOUND)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
@@ -236,24 +233,17 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
mutex_lock(&p->mutex);
- /* In case our notifier is called before IOMMU notifier */
+ kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
/* Iterate over all process device data structure and check
- * if we should delete debug managers and reset all wavefronts
+ * if we should delete debug managers
*/
- list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if ((pdd->dev->dbgmgr) &&
(pdd->dev->dbgmgr->pasid == p->pasid))
kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
- if (pdd->reset_wavefronts) {
- pr_warn("Resetting all wave fronts\n");
- dbgdev_wave_reset_wavefronts(pdd->dev, p);
- pdd->reset_wavefronts = false;
- }
- }
-
mutex_unlock(&p->mutex);
/*
@@ -362,8 +352,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
INIT_LIST_HEAD(&pdd->qpd.queues_list);
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
pdd->qpd.dqm = dev->dqm;
- pdd->reset_wavefronts = false;
+ pdd->process = p;
pdd->bound = PDD_UNBOUND;
+ pdd->already_dequeued = false;
list_add(&pdd->per_device_list, &p->per_device_data);
}
@@ -492,19 +483,12 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
kfd_dbgmgr_destroy(dev->dbgmgr);
- pqm_uninit(&p->pqm);
-
pdd = kfd_get_process_device_data(dev, p);
-
- if (!pdd) {
- mutex_unlock(&p->mutex);
- return;
- }
-
- if (pdd->reset_wavefronts) {
- dbgdev_wave_reset_wavefronts(pdd->dev, p);
- pdd->reset_wavefronts = false;
- }
+ if (pdd)
+ /* For GPU relying on IOMMU, we need to dequeue here
+ * when PASID is still bound.
+ */
+ kfd_process_dequeue_from_device(pdd);
mutex_unlock(&p->mutex);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 68fe0d9..31ec3ca 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -63,6 +63,25 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
return 0;
}
+void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
+{
+ struct kfd_dev *dev = pdd->dev;
+
+ if (pdd->already_dequeued)
+ return;
+
+ dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+ pdd->already_dequeued = true;
+}
+
+void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
+{
+ struct kfd_process_device *pdd;
+
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+ kfd_process_dequeue_from_device(pdd);
+}
+
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
{
INIT_LIST_HEAD(&pqm->queues);
@@ -78,21 +97,14 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
void pqm_uninit(struct process_queue_manager *pqm)
{
- int retval;
struct process_queue_node *pqn, *next;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
- retval = pqm_destroy_queue(
- pqm,
- (pqn->q != NULL) ?
- pqn->q->properties.queue_id :
- pqn->kq->queue->properties.queue_id);
-
- if (retval != 0) {
- pr_err("failed to destroy queue\n");
- return;
- }
+ uninit_queue(pqn->q);
+ list_del(&pqn->process_queue_list);
+ kfree(pqn);
}
+
kfree(pqm->queue_slot_bitmap);
pqm->queue_slot_bitmap = NULL;
}
@@ -290,9 +302,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (pqn->q) {
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
- if (retval != 0)
- return retval;
-
uninit_queue(pqn->q);
}
--
2.7.4
More information about the amd-gfx
mailing list