[v6 04/13] drm/amdgpu: Implement active VMID detection in MES11 queue reset for GFX

Zhang, Jesse(Jie) Jesse.Zhang at amd.com
Tue Aug 5 06:52:16 UTC 2025


[AMD Official Use Only - AMD Internal Distribution Only]

-----Original Message-----
From: Alex Deucher <alexdeucher at gmail.com>
Sent: Tuesday, August 5, 2025 1:03 AM
To: Zhang, Jesse(Jie) <Jesse.Zhang at amd.com>
Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander <Alexander.Deucher at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>; Chau, Kyle-hai <Kyle-hai.Chau at amd.com>
Subject: Re: [v6 04/13] drm/amdgpu: Implement active VMID detection in MES11 queue reset for GFX

On Mon, Aug 4, 2025 at 4:48 AM Jesse.Zhang <Jesse.Zhang at amd.com> wrote:
>
> MES queue reset functionality for GFX queues. The changes include:
>
> 1. Added detection of active VMIDs by reading CP_CNTX_STAT and CP_VMID
>    registers to properly identify contexts that need resetting
>
> 2. Implemented fallback to HPD status method when no active VMIDs are
>    found, checking both pipe 0 and pipe 1 queues
>
> 3. Extended the MES reset packet with:
>    - active_vmids bitmap
>    - connected_queue_index for pipe 0
>    - connected_queue_index_p1 for pipe 1
>

Do we still need this if we switch over to suspending queues before we run detect and reset?
   With that, we don’t' need it. Will drop it

Thanks
Jesse

Alex

> Suggested-by: kyle-hai.chau <kyle-hai.chau at amd.com>
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c        | 51 +++++++++++++++++++
>  drivers/gpu/drm/amd/include/mes_v11_api_def.h | 13 ++++-
>  2 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index ed6a7f8af544..1422bc59cd40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -789,6 +789,12 @@ static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
>                                                   struct
> mes_detect_and_reset_queue_input *input)  {
>         union MESAPI__RESET mes_reset_queue_pkt;
> +       struct amdgpu_device *adev = mes->adev;
> +       uint32_t active_vmids = 0;
> +       uint32_t connected_queue_index = 0;
> +       uint32_t queue_status = 0;
> +       uint32_t connected_queue_index_p1 = 0;
> +       uint32_t queue_status_p1 = 0;
>
>         memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
>
> @@ -801,6 +807,51 @@ static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
>         mes_reset_queue_pkt.doorbell_offset_addr =
>                 mes->hung_queue_db_array_gpu_addr;
>
> +       /* Add VMID detection for GFX queues */
> +       if (input->queue_type == AMDGPU_RING_TYPE_GFX) {
> +               uint32_t cp_cntx_stat = RREG32_SOC15(GC, 0, regCP_CNTX_STAT);
> +               uint32_t cp_vmid, grbm_gfx_cntl;
> +
> +               /* Check active contexts in CP_CNTX_STAT */
> +               for (uint32_t i = 0; i < 8; i++) {
> +                       if ((cp_cntx_stat >> (0x14 + i)) & 0x1) {
> +                               grbm_gfx_cntl = (i << 11);
> +                               WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl);
> +                               cp_vmid = RREG32_SOC15(GC, 0, regCP_VMID);
> +                               active_vmids |= (1 << cp_vmid);
> +                       }
> +               }
> +
> +               /* Fallback to HPD status if no active VMIDs found */
> +               if (active_vmids == 0) {
> +                       uint32_t hpd_status;
> +
> +                       /* Pipe 0 */
> +                       WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, 0);
> +                       hpd_status = RREG32_SOC15(GC, 0, regCP_GFX_HPD_STATUS0);
> +                       queue_status = hpd_status & 0x1F;
> +                       connected_queue_index = (hpd_status & 0xE0) >>
> + 5;
> +
> +                       /* Pipe 1 */
> +                       WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, (1 << 6));
> +                       hpd_status = RREG32_SOC15(GC, 0, regCP_GFX_HPD_STATUS0);
> +                       queue_status_p1 = hpd_status & 0x1F;
> +                       connected_queue_index_p1 = (hpd_status & 0xE0) >> 5;
> +               }
> +
> +               mes_reset_queue_pkt.active_vmids = active_vmids;
> +               if (active_vmids == 0) {
> +                       if (queue_status != 0) {
> +                               mes_reset_queue_pkt.use_connected_queue_index = 1;
> +                               mes_reset_queue_pkt.connected_queue_index = connected_queue_index;
> +                       }
> +                       if (queue_status_p1 != 0) {
> +                               mes_reset_queue_pkt.use_connected_queue_index_p1 = 1;
> +                               mes_reset_queue_pkt.connected_queue_index_p1 = connected_queue_index_p1;
> +                       }
> +               }
> +       }
> +
>         if (input->detect_only)
>                 mes_reset_queue_pkt.hang_detect_only = 1;
>         else
> diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> index 15680c3f4970..62ad4f0337eb 100644
> --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> @@ -460,7 +460,11 @@ union MESAPI__RESET {
>                         uint32_t                hang_detect_only : 1;
>                         /* Rest HP and LP kernel queues not managed by MES */
>                         uint32_t                reset_legacy_gfx : 1;
> -                       uint32_t                reserved : 28;
> +                       /* Fallback to use conneceted queue index when CP_CNTX_STAT method fails (gfx pipe 0) */
> +                       uint32_t                use_connected_queue_index : 1;
> +                       /* For gfx pipe 1 */
> +                       uint32_t                use_connected_queue_index_p1 : 1;
> +                       uint32_t                reserved : 26;
>                 };
>
>                 uint64_t                        gang_context_addr;
> @@ -488,6 +492,13 @@ union MESAPI__RESET {
>                 uint64_t                        wptr_addr_hp;
>
>                 struct MES_API_STATUS           api_status;
> +               uint32_t                        active_vmids;
> +               uint64_t                        timestamp;
> +
> +               uint32_t                        gang_context_array_index;
> +
> +               uint32_t                        connected_queue_index;
> +               uint32_t                        connected_queue_index_p1;
>         };
>
>         uint32_t        max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
> --
> 2.49.0
>


More information about the amd-gfx mailing list