[PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Jan 20 11:58:35 UTC 2020
Am 20.01.20 um 12:08 schrieb Tianci Yin:
> From: "Tianci.Yin" <tianci.yin at amd.com>
>
> [why]
> In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by
> UMC, that causes problems(like GTT corrupted and page fault observed).
>
> [how]
> Saving the content of this bottom VRAM to system memory before training, and
> restoring it after training to avoid VRAM corruption.
You need to re-order the patches, this one should come first and the
other one last.
One more style nit pick below.
>
> Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53
> Signed-off-by: Tianci.Yin <tianci.yin at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 32 ++++++++++++++++++++++++-
> 2 files changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> index 3265487b859f..611021514c52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> @@ -172,6 +172,8 @@ struct psp_dtm_context {
> #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942
> #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000
> #define GDDR6_MEM_TRAINING_OFFSET 0x8000
> +/*Define the VRAM size that will be encroached by BIST training.*/
> +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
>
> enum psp_memory_training_init_flag {
> PSP_MEM_TRAIN_NOT_SUPPORT = 0x0,
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> index 685dd9754c67..51011b661ba8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> @@ -972,7 +972,10 @@ static int psp_v11_0_memory_training_init(struct psp_context *psp)
> static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
> {
> int ret;
> + void *buf;
> + uint32_t sz;
> uint32_t p2c_header[4];
> + struct amdgpu_device *adev = psp->adev;
> struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
> uint32_t *pcache = (uint32_t*)ctx->sys_cache;
In general it is preferred to order the lines in reverse xmas tree.
E.g. long lines with pre-initializes variables such as adev, ctx, pcache
first. And temporary stuff like i, ret, buf etc last.
Apart from that this looks good to me,
Christian.
>
> @@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
> return 0;
> }
>
> - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
> + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
> DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
> pcache[0], pcache[1], pcache[2], pcache[3],
> p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
> @@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
> DRM_DEBUG("Memory training ops:%x.\n", ops);
>
> if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
> + /*
> + * Long traing will encroach certain mount of bottom VRAM,
> + * saving the content of this bottom VRAM to system memory
> + * before training, and restoring it after training to avoid
> + * VRAM corruption.
> + */
> + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
> +
> + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
> + DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
> + adev->gmc.visible_vram_size,
> + adev->mman.aper_base_kaddr);
> + return -EINVAL;
> + }
> +
> + buf = vmalloc(sz);
> + if (!buf) {
> + DRM_ERROR("failed to allocate system memory.\n");
> + return -ENOMEM;
> + }
> +
> + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
> ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
> if (ret) {
> DRM_ERROR("Send long training msg failed.\n");
> + vfree(buf);
> return ret;
> }
> +
> + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
> + adev->nbio.funcs->hdp_flush(adev, NULL);
> + vfree(buf);
> }
>
> if (ops & PSP_MEM_TRAIN_SAVE) {
More information about the amd-gfx
mailing list