[PATCH] drm/amdkfd: fix and enable ttmp setup for gfx11
Eric Huang
jinhuieric.huang at amd.com
Tue Jul 25 19:23:47 UTC 2023
On 2023-07-24 15:01, Jonathan Kim wrote:
> The MES cached process context must be cleared on adding any queue for
> the first time.
>
> For proper debug support, the MES will clear it's cached process context
> on the first call to SET_SHADER_DEBUGGER.
>
> This allows TTMPs to be pesistently enabled in a safe manner.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
Reviewed-by: Eric Huang <jinhuieric.huang at amd.com>
Regards,
Eric
> ---
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 2 +-
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 ++++++++++++-
> drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 19 +++++++++----------
> drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 11 ++++++++++-
> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 2 ++
> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 12 +++++-------
> 6 files changed, 39 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> index 77ca5cbfb601..d67d003bada2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> @@ -637,7 +637,7 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
> {
> uint32_t data = 0;
>
> - data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
> data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
> data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index e0f9cf6dd8fd..42df972357e9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2755,6 +2755,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
>
> if (pdd->qpd.queue_count)
> return -EEXIST;
> +
> + /*
> + * Setup TTMPs by default.
> + * Note that this call must remain here for MES ADD QUEUE to
> + * skip_process_ctx_clear unconditionally as the first call to
> + * SET_SHADER_DEBUGGER clears any stale process context data
> + * saved in MES.
> + */
> + if (pdd->dev->kfd->shared_resources.enable_mes)
> + kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
> }
>
> p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
> @@ -2848,7 +2858,8 @@ static int runtime_disable(struct kfd_process *p)
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> debug_refresh_runlist(pdd->dev->dqm);
> else
> - kfd_dbg_set_mes_debug_mode(pdd);
> + kfd_dbg_set_mes_debug_mode(pdd,
> + !kfd_dbg_has_cwsr_workaround(pdd->dev));
> }
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 1f82caea59ba..9ec750666382 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -344,11 +344,10 @@ static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable)
> return r;
> }
>
> -int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
> +int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
> {
> uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
> uint32_t flags = pdd->process->dbg_flags;
> - bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
>
> if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
> return 0;
> @@ -432,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> r = debug_map_and_unlock(pdd->dev->dqm);
> else
> - r = kfd_dbg_set_mes_debug_mode(pdd);
> + r = kfd_dbg_set_mes_debug_mode(pdd, true);
>
> kfd_dbg_clear_dev_watch_id(pdd, watch_id);
>
> @@ -474,7 +473,7 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> r = debug_map_and_unlock(pdd->dev->dqm);
> else
> - r = kfd_dbg_set_mes_debug_mode(pdd);
> + r = kfd_dbg_set_mes_debug_mode(pdd, true);
>
> /* HWS is broken so no point in HW rollback but release the watchpoint anyways */
> if (r)
> @@ -516,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> r = debug_refresh_runlist(pdd->dev->dqm);
> else
> - r = kfd_dbg_set_mes_debug_mode(pdd);
> + r = kfd_dbg_set_mes_debug_mode(pdd, true);
>
> if (r) {
> target->dbg_flags = prev_flags;
> @@ -539,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> debug_refresh_runlist(pdd->dev->dqm);
> else
> - kfd_dbg_set_mes_debug_mode(pdd);
> + kfd_dbg_set_mes_debug_mode(pdd, true);
> }
> }
>
> @@ -601,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> debug_refresh_runlist(pdd->dev->dqm);
> else
> - kfd_dbg_set_mes_debug_mode(pdd);
> + kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
> }
>
> kfd_dbg_set_workaround(target, false);
> @@ -717,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> r = debug_refresh_runlist(pdd->dev->dqm);
> else
> - r = kfd_dbg_set_mes_debug_mode(pdd);
> + r = kfd_dbg_set_mes_debug_mode(pdd, true);
>
> if (r) {
> target->runtime_info.runtime_state =
> @@ -851,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> r = debug_refresh_runlist(pdd->dev->dqm);
> else
> - r = kfd_dbg_set_mes_debug_mode(pdd);
> + r = kfd_dbg_set_mes_debug_mode(pdd, true);
>
> if (r)
> break;
> @@ -883,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
> if (!pdd->dev->kfd->shared_resources.enable_mes)
> r = debug_refresh_runlist(pdd->dev->dqm);
> else
> - r = kfd_dbg_set_mes_debug_mode(pdd);
> + r = kfd_dbg_set_mes_debug_mode(pdd, true);
>
> if (r)
> break;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index 586d7f886712..fd0ff64d4184 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -126,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
> return true;
> }
>
> -int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd);
> +int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
> +
> +static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
> +{
> + return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
> + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
> + (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
> + KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
> + (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
> +}
> #endif
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 71b7f16c0173..ccaf85fc12c2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -228,6 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
> queue_input.tma_addr = qpd->tma_addr;
> queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
> queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
> + queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
> + kfd_dbg_has_ttmps_always_setup(q->device);
>
> queue_type = convert_to_mes_queue_type(q->properties.type);
> if (queue_type < 0) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index eeedc3ddffeb..3b0749390388 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -38,6 +38,7 @@
> #include "kfd_device_queue_manager.h"
> #include "kfd_iommu.h"
> #include "kfd_svm.h"
> +#include "kfd_debug.h"
> #include "amdgpu_amdkfd.h"
> #include "amdgpu_ras.h"
> #include "amdgpu.h"
> @@ -1931,6 +1932,9 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
> HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
> HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
>
> + if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
> + dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
> +
> if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
> if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
> dev->node_props.debug_prop |=
> @@ -1941,10 +1945,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
> HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
> HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
>
> - if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 4, 2))
> - dev->node_props.debug_prop |=
> - HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
> -
> if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
> dev->node_props.capability |=
> HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
> @@ -1952,9 +1952,7 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
> dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
> HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
>
> - if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
> - dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
> - else
> + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
> dev->node_props.capability |=
> HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
> }
More information about the amd-gfx
mailing list