[PATCH 1/2] drm/amdkfd: fix trap handling work around for debugging

Kim, Jonathan Jonathan.Kim at amd.com
Tue Jul 18 22:13:23 UTC 2023


[Public]

+ Ruiji Li as this is a follow up to

commit 52223c7e74d124bea47beec467e59fdfc77559fc
Author: Ruili Ji <ruiliji2 at amd.com>
Date:   Tue Jun 6 14:06:01 2023 +0800

    drm/amdkfd: To enable traps for GC_11_0_4 and up

    Flag trap_en should be enabled for trap handler.

    Signed-off-by: Ruili Ji <ruiliji2 at amd.com>
    Signed-off-by: Aaron Liu <aaron.liu at amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

To ensure debugger is consistent with other checks.

Thanks,

Jon

> -----Original Message-----
> From: Kim, Jonathan <Jonathan.Kim at amd.com>
> Sent: Friday, July 14, 2023 5:38 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Kim, Jonathan
> <Jonathan.Kim at amd.com>
> Subject: [PATCH 1/2] drm/amdkfd: fix trap handling work around for
> debugging
>
> Update the list of devices that require the cwsr trap handling
> workaround for debugging use cases.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c                | 5 ++---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.h                | 6 ++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++----
>  3 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 190b03efe5ff..ccfc81f085ce 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct
> queue *q, bool enable)
>       if (!q)
>               return 0;
>
> -     if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
> -         KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
> +     if (!kfd_dbg_has_cwsr_workaround(q->device))
>               return 0;
>
>       if (enable && q->properties.is_user_cu_masked)
> @@ -349,7 +348,7 @@ int kfd_dbg_set_mes_debug_mode(struct
> kfd_process_device *pdd)
>  {
>       uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd-
> >spi_dbg_launch_mode;
>       uint32_t flags = pdd->process->dbg_flags;
> -     bool sq_trap_en = !!spi_dbg_cntl;
> +     bool sq_trap_en = !!spi_dbg_cntl ||
> !kfd_dbg_has_cwsr_workaround(pdd->dev);
>
>       if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
>               return 0;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index ba616ed17dee..586d7f886712 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -101,6 +101,12 @@ static inline bool
> kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
>                KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
>  }
>
> +static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
> +{
> +     return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
> +            KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
> +}
> +
>  static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
>  {
>       if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 31cac1fd0d58..761963ad6154 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -226,8 +226,7 @@ static int add_queue_mes(struct
> device_queue_manager *dqm, struct queue *q,
>       queue_input.paging = false;
>       queue_input.tba_addr = qpd->tba_addr;
>       queue_input.tma_addr = qpd->tma_addr;
> -     queue_input.trap_en = KFD_GC_VERSION(q->device) <
> IP_VERSION(11, 0, 0) ||
> -                           KFD_GC_VERSION(q->device) > IP_VERSION(11, 0,
> 3);
> +     queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
>       queue_input.skip_process_ctx_clear = qpd->pqm->process-
> >debug_trap_enabled;
>
>       queue_type = convert_to_mes_queue_type(q->properties.type);
> @@ -1827,8 +1826,7 @@ static int create_queue_cpsch(struct
> device_queue_manager *dqm, struct queue *q,
>        */
>       q->properties.is_evicted = !!qpd->evicted;
>       q->properties.is_dbg_wa = qpd->pqm->process-
> >debug_trap_enabled &&
> -                     KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0)
> &&
> -                     KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
> +                               kfd_dbg_has_cwsr_workaround(q->device);
>
>       if (qd)
>               mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q-
> >mqd_mem_obj, &q->gart_mqd_addr,
> --
> 2.25.1



More information about the amd-gfx mailing list