[PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training

Tianci Yin tianci.yin at amd.com
Mon Jan 20 11:08:22 UTC 2020


From: "Tianci.Yin" <tianci.yin at amd.com>

[why]
In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by
UMC, that causes problems(like GTT corrupted and page fault observed).

[how]
Saving the content of this bottom VRAM to system memory before training, and
restoring it after training to avoid VRAM corruption.

Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53
Signed-off-by: Tianci.Yin <tianci.yin at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 32 ++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 3265487b859f..611021514c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -172,6 +172,8 @@ struct psp_dtm_context {
 #define MEM_TRAIN_SYSTEM_SIGNATURE		0x54534942
 #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES	0x1000
 #define GDDR6_MEM_TRAINING_OFFSET		0x8000
+/*Define the VRAM size that will be encroached by BIST training.*/
+#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE	0x2000000
 
 enum psp_memory_training_init_flag {
 	PSP_MEM_TRAIN_NOT_SUPPORT	= 0x0,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 685dd9754c67..51011b661ba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -972,7 +972,10 @@ static int psp_v11_0_memory_training_init(struct psp_context *psp)
 static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
 {
 	int ret;
+	void *buf;
+	uint32_t sz;
 	uint32_t p2c_header[4];
+	struct amdgpu_device *adev = psp->adev;
 	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
 	uint32_t *pcache = (uint32_t*)ctx->sys_cache;
 
@@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
 		return 0;
 	}
 
-	amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+	amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
 	DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
 		  pcache[0], pcache[1], pcache[2], pcache[3],
 		  p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
@@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
 	DRM_DEBUG("Memory training ops:%x.\n", ops);
 
 	if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+		/*
+		 * Long traing will encroach certain mount of bottom VRAM,
+		 * saving the content of this bottom VRAM to system memory
+		 * before training, and restoring it after training to avoid
+		 * VRAM corruption.
+		 */
+		sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+
+		if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+			DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+				  adev->gmc.visible_vram_size,
+				  adev->mman.aper_base_kaddr);
+			return -EINVAL;
+		}
+
+		buf = vmalloc(sz);
+		if (!buf) {
+			DRM_ERROR("failed to allocate system memory.\n");
+			return -ENOMEM;
+		}
+
+		memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
 		ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
 		if (ret) {
 			DRM_ERROR("Send long training msg failed.\n");
+			vfree(buf);
 			return ret;
 		}
+
+		memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+		adev->nbio.funcs->hdp_flush(adev, NULL);
+		vfree(buf);
 	}
 
 	if (ops & PSP_MEM_TRAIN_SAVE) {
-- 
2.17.1



More information about the amd-gfx mailing list