[PATCH 1/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training(V2)

Yin, Tianci (Rico) Tianci.Yin at amd.com
Tue Jan 21 10:21:14 UTC 2020


[AMD Official Use Only - Internal Distribution Only]

Thanks very much Christian!
________________________________
From: Koenig, Christian <Christian.Koenig at amd.com>
Sent: Tuesday, January 21, 2020 4:52
To: Yin, Tianci (Rico) <Tianci.Yin at amd.com>; amd-gfx at lists.freedesktop.org <amd-gfx at lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>; Xu, Feifei <Feifei.Xu at amd.com>; Yuan, Xiaojie <Xiaojie.Yuan at amd.com>; Long, Gang <Gang.Long at amd.com>; Wang, Kevin(Yang) <Kevin1.Wang at amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training(V2)

Am 21.01.20 um 03:22 schrieb Tianci Yin:
> From: "Tianci.Yin" <tianci.yin at amd.com>
>
> [why]
> In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by
> UMC, that causes problems(like GTT corrupted and page fault observed).
>
> [how]
> Saving the content of this bottom VRAM to system memory before training, and
> restoring it after training to avoid VRAM corruption.
>
> Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53
> Signed-off-by: Tianci.Yin <tianci.yin at amd.com>

Acked-by: Christian König <christian.koenig at amd.com> for this one and
Reviewed-by: Christian König <christian.koenig at amd.com> for patch #2.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  2 ++
>   drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 36 ++++++++++++++++++++++---
>   2 files changed, 35 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> index 3265487b859f..611021514c52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> @@ -172,6 +172,8 @@ struct psp_dtm_context {
>   #define MEM_TRAIN_SYSTEM_SIGNATURE          0x54534942
>   #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES       0x1000
>   #define GDDR6_MEM_TRAINING_OFFSET           0x8000
> +/*Define the VRAM size that will be encroached by BIST training.*/
> +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE   0x2000000
>
>   enum psp_memory_training_init_flag {
>        PSP_MEM_TRAIN_NOT_SUPPORT       = 0x0,
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> index 685dd9754c67..ac173d2eb809 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> @@ -971,10 +971,13 @@ static int psp_v11_0_memory_training_init(struct psp_context *psp)
>    */
>   static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
>   {
> -     int ret;
> -     uint32_t p2c_header[4];
>        struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
>        uint32_t *pcache = (uint32_t*)ctx->sys_cache;
> +     struct amdgpu_device *adev = psp->adev;
> +     uint32_t p2c_header[4];
> +     uint32_t sz;
> +     void *buf;
> +     int ret;
>
>        if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
>                DRM_DEBUG("Memory training is not supported.\n");
> @@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
>                return 0;
>        }
>
> -     amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
> +     amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
>        DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
>                  pcache[0], pcache[1], pcache[2], pcache[3],
>                  p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
> @@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
>        DRM_DEBUG("Memory training ops:%x.\n", ops);
>
>        if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
> +             /*
> +              * Long traing will encroach certain mount of bottom VRAM,
> +              * saving the content of this bottom VRAM to system memory
> +              * before training, and restoring it after training to avoid
> +              * VRAM corruption.
> +              */
> +             sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
> +
> +             if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
> +                     DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
> +                               adev->gmc.visible_vram_size,
> +                               adev->mman.aper_base_kaddr);
> +                     return -EINVAL;
> +             }
> +
> +             buf = vmalloc(sz);
> +             if (!buf) {
> +                     DRM_ERROR("failed to allocate system memory.\n");
> +                     return -ENOMEM;
> +             }
> +
> +             memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
>                ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
>                if (ret) {
>                        DRM_ERROR("Send long training msg failed.\n");
> +                     vfree(buf);
>                        return ret;
>                }
> +
> +             memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
> +             adev->nbio.funcs->hdp_flush(adev, NULL);
> +             vfree(buf);
>        }
>
>        if (ops & PSP_MEM_TRAIN_SAVE) {

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20200121/4e7849cf/attachment.htm>


More information about the amd-gfx mailing list