[PATCH] drm/amdkfd: fix and enable ttmp setup for gfx11

Eric Huang jinhuieric.huang at amd.com
Tue Jul 25 19:23:47 UTC 2023


On 2023-07-24 15:01, Jonathan Kim wrote:
> The MES cached process context must be cleared on adding any queue for
> the first time.
>
> For proper debug support, the MES will clear it's cached process context
> on the first call to SET_SHADER_DEBUGGER.
>
> This allows TTMPs to be pesistently enabled in a safe manner.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
Reviewed-by: Eric Huang <jinhuieric.huang at amd.com>

Regards,
Eric
> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c    |  2 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 13 ++++++++++++-
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 19 +++++++++----------
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.h        | 11 ++++++++++-
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  2 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c     | 12 +++++-------
>   6 files changed, 39 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> index 77ca5cbfb601..d67d003bada2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> @@ -637,7 +637,7 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
>   {
>   	uint32_t data = 0;
>   
> -	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
> +	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
>   	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
>   	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index e0f9cf6dd8fd..42df972357e9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2755,6 +2755,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
>   
>   		if (pdd->qpd.queue_count)
>   			return -EEXIST;
> +
> +		/*
> +		 * Setup TTMPs by default.
> +		 * Note that this call must remain here for MES ADD QUEUE to
> +		 * skip_process_ctx_clear unconditionally as the first call to
> +		 * SET_SHADER_DEBUGGER clears any stale process context data
> +		 * saved in MES.
> +		 */
> +		if (pdd->dev->kfd->shared_resources.enable_mes)
> +			kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
>   	}
>   
>   	p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
> @@ -2848,7 +2858,8 @@ static int runtime_disable(struct kfd_process *p)
>   			if (!pdd->dev->kfd->shared_resources.enable_mes)
>   				debug_refresh_runlist(pdd->dev->dqm);
>   			else
> -				kfd_dbg_set_mes_debug_mode(pdd);
> +				kfd_dbg_set_mes_debug_mode(pdd,
> +							   !kfd_dbg_has_cwsr_workaround(pdd->dev));
>   		}
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 1f82caea59ba..9ec750666382 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -344,11 +344,10 @@ static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable)
>   	return r;
>   }
>   
> -int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
> +int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
>   {
>   	uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
>   	uint32_t flags = pdd->process->dbg_flags;
> -	bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
>   
>   	if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
>   		return 0;
> @@ -432,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
>   	if (!pdd->dev->kfd->shared_resources.enable_mes)
>   		r = debug_map_and_unlock(pdd->dev->dqm);
>   	else
> -		r = kfd_dbg_set_mes_debug_mode(pdd);
> +		r = kfd_dbg_set_mes_debug_mode(pdd, true);
>   
>   	kfd_dbg_clear_dev_watch_id(pdd, watch_id);
>   
> @@ -474,7 +473,7 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
>   	if (!pdd->dev->kfd->shared_resources.enable_mes)
>   		r = debug_map_and_unlock(pdd->dev->dqm);
>   	else
> -		r = kfd_dbg_set_mes_debug_mode(pdd);
> +		r = kfd_dbg_set_mes_debug_mode(pdd, true);
>   
>   	/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
>   	if (r)
> @@ -516,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
>   		if (!pdd->dev->kfd->shared_resources.enable_mes)
>   			r = debug_refresh_runlist(pdd->dev->dqm);
>   		else
> -			r = kfd_dbg_set_mes_debug_mode(pdd);
> +			r = kfd_dbg_set_mes_debug_mode(pdd, true);
>   
>   		if (r) {
>   			target->dbg_flags = prev_flags;
> @@ -539,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
>   			if (!pdd->dev->kfd->shared_resources.enable_mes)
>   				debug_refresh_runlist(pdd->dev->dqm);
>   			else
> -				kfd_dbg_set_mes_debug_mode(pdd);
> +				kfd_dbg_set_mes_debug_mode(pdd, true);
>   		}
>   	}
>   
> @@ -601,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
>   		if (!pdd->dev->kfd->shared_resources.enable_mes)
>   			debug_refresh_runlist(pdd->dev->dqm);
>   		else
> -			kfd_dbg_set_mes_debug_mode(pdd);
> +			kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
>   	}
>   
>   	kfd_dbg_set_workaround(target, false);
> @@ -717,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
>   		if (!pdd->dev->kfd->shared_resources.enable_mes)
>   			r = debug_refresh_runlist(pdd->dev->dqm);
>   		else
> -			r = kfd_dbg_set_mes_debug_mode(pdd);
> +			r = kfd_dbg_set_mes_debug_mode(pdd, true);
>   
>   		if (r) {
>   			target->runtime_info.runtime_state =
> @@ -851,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
>   		if (!pdd->dev->kfd->shared_resources.enable_mes)
>   			r = debug_refresh_runlist(pdd->dev->dqm);
>   		else
> -			r = kfd_dbg_set_mes_debug_mode(pdd);
> +			r = kfd_dbg_set_mes_debug_mode(pdd, true);
>   
>   		if (r)
>   			break;
> @@ -883,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
>   		if (!pdd->dev->kfd->shared_resources.enable_mes)
>   			r = debug_refresh_runlist(pdd->dev->dqm);
>   		else
> -			r = kfd_dbg_set_mes_debug_mode(pdd);
> +			r = kfd_dbg_set_mes_debug_mode(pdd, true);
>   
>   		if (r)
>   			break;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index 586d7f886712..fd0ff64d4184 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -126,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
>   	return true;
>   }
>   
> -int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd);
> +int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
> +
> +static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
> +{
> +	return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
> +			KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
> +	       (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
> +			KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
> +			(dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
> +}
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 71b7f16c0173..ccaf85fc12c2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -228,6 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
>   	queue_input.tma_addr = qpd->tma_addr;
>   	queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
>   	queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
> +	queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
> +					     kfd_dbg_has_ttmps_always_setup(q->device);
>   
>   	queue_type = convert_to_mes_queue_type(q->properties.type);
>   	if (queue_type < 0) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index eeedc3ddffeb..3b0749390388 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -38,6 +38,7 @@
>   #include "kfd_device_queue_manager.h"
>   #include "kfd_iommu.h"
>   #include "kfd_svm.h"
> +#include "kfd_debug.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_ras.h"
>   #include "amdgpu.h"
> @@ -1931,6 +1932,9 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
>   			HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
>   			HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
>   
> +	if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
> +		dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
> +
>   	if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
>   		if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
>   			dev->node_props.debug_prop |=
> @@ -1941,10 +1945,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
>   				HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
>   				HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
>   
> -		if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 4, 2))
> -			dev->node_props.debug_prop |=
> -				HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
> -
>   		if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
>   			dev->node_props.capability |=
>   				HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
> @@ -1952,9 +1952,7 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
>   		dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
>   					HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
>   
> -		if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
> -			dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
> -		else
> +		if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
>   			dev->node_props.capability |=
>   				HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
>   	}



More information about the amd-gfx mailing list