[PATCH 1/2] drm/amdkfd: fix trap handling work around for debugging

Felix Kuehling felix.kuehling at amd.com
Wed Jul 19 14:19:13 UTC 2023


Am 2023-07-14 um 05:37 schrieb Jonathan Kim:
> Update the list of devices that require the cwsr trap handling
> workaround for debugging use cases.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>

This patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.c                | 5 ++---
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.h                | 6 ++++++
>   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++----
>   3 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 190b03efe5ff..ccfc81f085ce 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
>   	if (!q)
>   		return 0;
>   
> -	if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
> -	    KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
> +	if (!kfd_dbg_has_cwsr_workaround(q->device))
>   		return 0;
>   
>   	if (enable && q->properties.is_user_cu_masked)
> @@ -349,7 +348,7 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
>   {
>   	uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
>   	uint32_t flags = pdd->process->dbg_flags;
> -	bool sq_trap_en = !!spi_dbg_cntl;
> +	bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
>   
>   	if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
>   		return 0;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index ba616ed17dee..586d7f886712 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -101,6 +101,12 @@ static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
>   		 KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
>   }
>   
> +static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
> +{
> +	return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
> +	       KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
> +}
> +
>   static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
>   {
>   	if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 31cac1fd0d58..761963ad6154 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -226,8 +226,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
>   	queue_input.paging = false;
>   	queue_input.tba_addr = qpd->tba_addr;
>   	queue_input.tma_addr = qpd->tma_addr;
> -	queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
> -			      KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3);
> +	queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
>   	queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
>   
>   	queue_type = convert_to_mes_queue_type(q->properties.type);
> @@ -1827,8 +1826,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>   	 */
>   	q->properties.is_evicted = !!qpd->evicted;
>   	q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
> -			KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) &&
> -			KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
> +				  kfd_dbg_has_cwsr_workaround(q->device);
>   
>   	if (qd)
>   		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,


More information about the amd-gfx mailing list