[PATCH v2] drm/amdkfd: To fix sdma page fault issue for GC 11.x

Zhang, Yifan Yifan1.Zhang at amd.com
Fri Feb 10 03:18:02 UTC 2023


[Public]

This patch is :

Reviewed-by: Yifan Zhang <yifan1.zhang at amd.com>

-----Original Message-----
From: Ji, Ruili <Ruili.Ji at amd.com> 
Sent: Thursday, February 9, 2023 2:18 PM
To: amd-gfx at lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>; Liu, Aaron <Aaron.Liu at amd.com>; Zhang, Yifan <Yifan1.Zhang at amd.com>; Ji, Ruili <Ruili.Ji at amd.com>
Subject: [PATCH v2] drm/amdkfd: To fix sdma page fault issue for GC 11.x

From: Ruili Ji <ruiliji2 at amd.com>

For the MQD memory, KMD would always allocate 4K memory, and mes scheduler would write to the end of MQD for unmap flag.

Signed-off-by: Ruili Ji <ruiliji2 at amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  5 +++--  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 15 ++++++++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c06ada0844ba..7a95698d83f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2373,7 +2373,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 	if (init_mqd_managers(dqm))
 		goto out_free;
 
-	if (allocate_hiq_sdma_mqd(dqm)) {
+	if (!dev->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
 		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
 		goto out_free;
 	}
@@ -2397,7 +2397,8 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,  void device_queue_manager_uninit(struct device_queue_manager *dqm)  {
 	dqm->ops.uninitialize(dqm);
-	deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
+	if (!dqm->dev->shared_resources.enable_mes)
+		deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
 	kfree(dqm);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 4f6390f3236e..4a9af800b1f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -308,11 +308,16 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 		struct queue_properties *q)
 {
 	struct v11_sdma_mqd *m;
+	int size;
 
 	m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
 
-	memset(m, 0, sizeof(struct v11_sdma_mqd));
+	if (mm->dev->shared_resources.enable_mes)
+		size = PAGE_SIZE;
+	else
+		size = sizeof(struct v11_sdma_mqd);
 
+	memset(m, 0, size);
 	*mqd = m;
 	if (gart_addr)
 		*gart_addr = mqd_mem_obj->gpu_addr;
@@ -443,6 +448,14 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,  #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;  #endif
+		/*
+		 * To allocate SDMA MQDs by generic functions
+		 * when MES is enabled.
+		 */
+		if (dev->shared_resources.enable_mes) {
+			mqd->allocate_mqd = allocate_mqd;
+			mqd->free_mqd = kfd_free_mqd_cp;
+		}
 		pr_debug("%s@%i\n", __func__, __LINE__);
 		break;
 	default:
--
2.25.1


More information about the amd-gfx mailing list