[PATCH 1/2] drm/amdgpu: Handle xgmi device removal and add reset wq.

Andrey Grodzovsky andrey.grodzovsky at amd.com
Thu Nov 29 20:36:55 UTC 2018


XGMI hive has some resources allocted on device init which
needs to be deallocated when the device is unregistered.

Add per hive wq to allow all the nodes in hive to run resets
concurently - this should speed up the total reset time to avoid
breaching the PSP FW timeout.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 24 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  3 +++
 2 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index fb37e69..9ac2dc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -61,6 +61,8 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
 	INIT_LIST_HEAD(&tmp->device_list);
 	mutex_init(&tmp->hive_lock);
 
+	tmp->reset_queue = alloc_workqueue("xgmi-hive", WQ_UNBOUND | WQ_HIGHPRI, 0);
+
 	return tmp;
 }
 
@@ -135,3 +137,25 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 	mutex_unlock(&xgmi_mutex);
 	return ret;
 }
+
+void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
+{
+	struct amdgpu_hive_info *hive;
+
+	if ((adev->asic_type < CHIP_VEGA20) || (adev->flags & AMD_IS_APU))
+		return;
+
+	mutex_lock(&xgmi_mutex);
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (!hive)
+		goto exit;
+
+	if (!(hive->number_devices--)) {
+		mutex_destroy(&hive->hive_lock);
+		destroy_workqueue(hive->reset_queue);
+	}
+
+exit:
+	mutex_unlock(&xgmi_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6335bfd..285ab93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -30,10 +30,13 @@ struct amdgpu_hive_info {
 	struct psp_xgmi_topology_info	topology_info;
 	int number_devices;
 	struct mutex hive_lock;
+	/* hive members reset wq */
+	struct workqueue_struct *reset_queue;
 };
 
 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
+void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
 
 #endif
-- 
2.7.4



More information about the amd-gfx mailing list