[PATCHv2 1/3] drm/amdgpu: Implement MES Suspend and Resume APIs for GFX11

Alex Deucher alexdeucher at gmail.com
Thu Aug 15 18:15:44 UTC 2024


On Wed, Aug 14, 2024 at 7:28 PM Mukul Joshi <mukul.joshi at amd.com> wrote:
>
> Add implementation for MES Suspend and Resume APIs to unmap/map
> all queues for GFX11. Support for GFX12 will be added when the
> corresponding firmware support is in place.
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
> v1->v2:
> - Add MES FW version check.
> - Update amdgpu_mes_suspend/amdgpu_mes_resume handling.
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 71 +++++++++++++------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  2 +
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 32 ++++++++++-
>  3 files changed, 69 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index c598c3edff7e..e551943da77a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -501,60 +501,50 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
>
>  int amdgpu_mes_suspend(struct amdgpu_device *adev)
>  {
> -       struct idr *idp;
> -       struct amdgpu_mes_process *process;
> -       struct amdgpu_mes_gang *gang;
>         struct mes_suspend_gang_input input;
> -       int r, pasid;
> +       int r;
> +
> +       if (!amdgpu_mes_suspend_resume_all_supported(adev))
> +               return 0;
> +
> +       memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
> +       input.suspend_all_gangs = 1;
>
>         /*
>          * Avoid taking any other locks under MES lock to avoid circular
>          * lock dependencies.
>          */
>         amdgpu_mes_lock(&adev->mes);
> -
> -       idp = &adev->mes.pasid_idr;
> -
> -       idr_for_each_entry(idp, process, pasid) {
> -               list_for_each_entry(gang, &process->gang_list, list) {
> -                       r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
> -                       if (r)
> -                               DRM_ERROR("failed to suspend pasid %d gangid %d",
> -                                        pasid, gang->gang_id);
> -               }
> -       }
> -
> +       r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
>         amdgpu_mes_unlock(&adev->mes);
> -       return 0;
> +       if (r)
> +               DRM_ERROR("failed to suspend all gangs");
> +
> +       return r;
>  }
>
>  int amdgpu_mes_resume(struct amdgpu_device *adev)
>  {
> -       struct idr *idp;
> -       struct amdgpu_mes_process *process;
> -       struct amdgpu_mes_gang *gang;
>         struct mes_resume_gang_input input;
> -       int r, pasid;
> +       int r;
> +
> +       if (!amdgpu_mes_suspend_resume_all_supported(adev))
> +               return 0;
> +
> +       memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
> +       input.resume_all_gangs = 1;
>
>         /*
>          * Avoid taking any other locks under MES lock to avoid circular
>          * lock dependencies.
>          */
>         amdgpu_mes_lock(&adev->mes);
> -
> -       idp = &adev->mes.pasid_idr;
> -
> -       idr_for_each_entry(idp, process, pasid) {
> -               list_for_each_entry(gang, &process->gang_list, list) {
> -                       r = adev->mes.funcs->resume_gang(&adev->mes, &input);
> -                       if (r)
> -                               DRM_ERROR("failed to resume pasid %d gangid %d",
> -                                        pasid, gang->gang_id);
> -               }
> -       }
> -
> +       r = adev->mes.funcs->resume_gang(&adev->mes, &input);
>         amdgpu_mes_unlock(&adev->mes);
> -       return 0;
> +       if (r)
> +               DRM_ERROR("failed to resume all gangs");
> +
> +       return r;
>  }
>
>  static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
> @@ -1608,6 +1598,19 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
>         return r;
>  }
>
> +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
> +{
> +       uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
> +       bool is_supported = false;
> +
> +       if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
> +           amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
> +           mes_rev >= 0x63)
> +               is_supported = true;
> +
> +       return is_supported;
> +}
> +
>  #if defined(CONFIG_DEBUG_FS)
>
>  static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 548e724e3a75..b2db62e50454 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -494,4 +494,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
>         memalloc_noreclaim_restore(mes->saved_flags);
>         mutex_unlock(&mes->mutex_hidden);
>  }
> +
> +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
>  #endif /* __AMDGPU_MES_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index c0340ee3dec0..a5c582674db9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -421,13 +421,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
>  static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
>                                   struct mes_suspend_gang_input *input)
>  {
> -       return 0;
> +       union MESAPI__SUSPEND mes_suspend_gang_pkt;
> +
> +       memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
> +
> +       mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
> +       mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
> +       mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> +       mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
> +       mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
> +       mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
> +       mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
> +
> +       return mes_v11_0_submit_pkt_and_poll_completion(mes,
> +                       &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
> +                       offsetof(union MESAPI__SUSPEND, api_status));
>  }
>
>  static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
>                                  struct mes_resume_gang_input *input)
>  {
> -       return 0;
> +       union MESAPI__RESUME mes_resume_gang_pkt;
> +
> +       memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
> +
> +       mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
> +       mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
> +       mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> +       mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
> +       mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
> +
> +       return mes_v11_0_submit_pkt_and_poll_completion(mes,
> +                       &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
> +                       offsetof(union MESAPI__RESUME, api_status));
>  }
>
>  static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
> --
> 2.35.1
>


More information about the amd-gfx mailing list