[PATCH] drm/amdkfd: To fix sdma page fault issue for GC 11.x

Liu, Aaron Aaron.Liu at amd.com
Mon Feb 6 13:18:06 UTC 2023


[AMD Official Use Only - General]

Reviewed-by: Aaron Liu <aaron.liu at amd.com>

> -----Original Message-----
> From: Ji, Ruili <Ruili.Ji at amd.com>
> Sent: Monday, February 6, 2023 8:58 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Kuehling, Felix
> <Felix.Kuehling at amd.com>; Liu, Aaron <Aaron.Liu at amd.com>; Zhang, Yifan
> <Yifan1.Zhang at amd.com>; Ji, Ruili <Ruili.Ji at amd.com>
> Subject: [PATCH] drm/amdkfd: To fix sdma page fault issue for GC 11.x
>
> From: Ruili Ji <ruiliji2 at amd.com>
>
> For the MQD memory, KMD would always allocate 4K memory, and mes
> scheduler would write to the end of MQD for unmap flag.
>
> Signed-off-by: Ruili Ji <ruiliji2 at amd.com>
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20
> +++++++++++++++----  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> | 12 +++++++++--
>  2 files changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index c06ada0844ba..d682e6921438 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -2244,10 +2244,22 @@ static int allocate_hiq_sdma_mqd(struct
> device_queue_manager *dqm)
>       int retval;
>       struct kfd_dev *dev = dqm->dev;
>       struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
> -     uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]-
> >mqd_size *
> -             get_num_all_sdma_engines(dqm) *
> -             dev->device_info.num_sdma_queues_per_engine +
> -             dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
> +     uint32_t size;
> +     /*
> +      * MES write to areas beyond MQD size. So allocate
> +      * 1 PAGE_SIZE memory for MQD is MES is enabled.
> +      */
> +     if (dev->shared_resources.enable_mes) {
> +             size = PAGE_SIZE *
> +                     get_num_all_sdma_engines(dqm) *
> +                     dev->device_info.num_sdma_queues_per_engine +
> +                     dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
> +     } else {
> +             size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size
> *
> +                     get_num_all_sdma_engines(dqm) *
> +                     dev->device_info.num_sdma_queues_per_engine +
> +                     dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
> +     }
>
>       retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
>               &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> index 623ccd227b7d..ea176a515898 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> @@ -66,15 +66,23 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct
> kfd_dev *dev,  {
>       struct kfd_mem_obj *mqd_mem_obj = NULL;
>       uint64_t offset;
> +     uint32_t size;
>
>       mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
>       if (!mqd_mem_obj)
>               return NULL;
> +     /*
> +      * MES write to areas beyond MQD size. So allocate
> +      * 1 PAGE_SIZE memory for MQD is MES is enabled.
> +      */
> +     if (dev->shared_resources.enable_mes)
> +             size = PAGE_SIZE;
> +     else
> +             size = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]-
> >mqd_size;
>
>       offset = (q->sdma_engine_id *
>               dev->device_info.num_sdma_queues_per_engine +
> -             q->sdma_queue_id) *
> -             dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
> +             q->sdma_queue_id) * size;
>
>       offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
>
> --
> 2.25.1



More information about the amd-gfx mailing list