[PATCH 13/33] drm/amdkfd: prepare map process for single process debug devices
Felix Kuehling
felix.kuehling at amd.com
Tue May 30 19:36:49 UTC 2023
Am 2023-05-25 um 13:27 schrieb Jonathan Kim:
> Older HW only supports debugging on a single process because the
> SPI debug mode setting registers are device global.
>
> The HWS has supplied a single pinned VMID (0xf) for MAP_PROCESS
> for debug purposes. To pin the VMID, the KFD will remove the VMID from
> the HWS dynamic VMID allocation via SET_RESOUCES so that a debugged
> process will never migrate away from its pinned VMID.
>
> The KFD is responsible for reserving and releasing this pinned VMID
> accordingly whenever the debugger attaches and detaches respectively.
>
> v2: spot fix ups using new kfd_node references
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 93 +++++++++++++++++++
> .../drm/amd/amdkfd/kfd_device_queue_manager.h | 5 +
> .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 9 ++
> .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h | 5 +-
> 4 files changed, 111 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index d1f44feb7084..c8519adc89ac 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1524,6 +1524,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
> dqm->gws_queue_count = 0;
> dqm->active_runlist = false;
> INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
> + dqm->trap_debug_vmid = 0;
>
> init_sdma_bitmaps(dqm);
>
> @@ -2500,6 +2501,98 @@ static void kfd_process_hw_exception(struct work_struct *work)
> amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
> }
>
> +int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
> + struct qcm_process_device *qpd)
> +{
> + int r;
> + int updated_vmid_mask;
> +
> + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> + pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
> + return -EINVAL;
> + }
> +
> + dqm_lock(dqm);
> +
> + if (dqm->trap_debug_vmid != 0) {
> + pr_err("Trap debug id already reserved\n");
> + r = -EBUSY;
> + goto out_unlock;
> + }
> +
> + r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
> + USE_DEFAULT_GRACE_PERIOD, false);
> + if (r)
> + goto out_unlock;
> +
> + updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
> + updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd);
> +
> + dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
> + dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd;
> + r = set_sched_resources(dqm);
> + if (r)
> + goto out_unlock;
> +
> + r = map_queues_cpsch(dqm);
> + if (r)
> + goto out_unlock;
> +
> + pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid);
> +
> +out_unlock:
> + dqm_unlock(dqm);
> + return r;
> +}
> +
> +/*
> + * Releases vmid for the trap debugger
> + */
> +int release_debug_trap_vmid(struct device_queue_manager *dqm,
> + struct qcm_process_device *qpd)
> +{
> + int r;
> + int updated_vmid_mask;
> + uint32_t trap_debug_vmid;
> +
> + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> + pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
> + return -EINVAL;
> + }
> +
> + dqm_lock(dqm);
> + trap_debug_vmid = dqm->trap_debug_vmid;
> + if (dqm->trap_debug_vmid == 0) {
> + pr_err("Trap debug id is not reserved\n");
> + r = -EINVAL;
> + goto out_unlock;
> + }
> +
> + r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
> + USE_DEFAULT_GRACE_PERIOD, false);
> + if (r)
> + goto out_unlock;
> +
> + updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
> + updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd);
> +
> + dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
> + dqm->trap_debug_vmid = 0;
> + r = set_sched_resources(dqm);
> + if (r)
> + goto out_unlock;
> +
> + r = map_queues_cpsch(dqm);
> + if (r)
> + goto out_unlock;
> +
> + pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid);
> +
> +out_unlock:
> + dqm_unlock(dqm);
> + return r;
> +}
> +
> #if defined(CONFIG_DEBUG_FS)
>
> static void seq_reg_dump(struct seq_file *m,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index d4dd3b4acbf0..bf7aa3f84182 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -250,6 +250,7 @@ struct device_queue_manager {
> struct kfd_mem_obj *fence_mem;
> bool active_runlist;
> int sched_policy;
> + uint32_t trap_debug_vmid;
>
> /* hw exception */
> bool is_hws_hang;
> @@ -285,6 +286,10 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
> unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
> unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
> unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
> +int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
> + struct qcm_process_device *qpd);
> +int release_debug_trap_vmid(struct device_queue_manager *dqm,
> + struct qcm_process_device *qpd);
>
> static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
> {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index 1fda6dcf84b1..0fe73dbd28af 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -34,6 +34,9 @@ static int pm_map_process_v9(struct packet_manager *pm,
> {
> struct pm4_mes_map_process *packet;
> uint64_t vm_page_table_base_addr = qpd->page_table_base;
> + struct kfd_node *kfd = pm->dqm->dev;
> + struct kfd_process_device *pdd =
> + container_of(qpd, struct kfd_process_device, qpd);
>
> packet = (struct pm4_mes_map_process *)buffer;
> memset(buffer, 0, sizeof(struct pm4_mes_map_process));
> @@ -49,6 +52,12 @@ static int pm_map_process_v9(struct packet_manager *pm,
> packet->bitfields14.sdma_enable = 1;
> packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
>
> + if (kfd->dqm->trap_debug_vmid && pdd->process->debug_trap_enabled &&
> + pdd->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
> + packet->bitfields2.debug_vmid = kfd->dqm->trap_debug_vmid;
> + packet->bitfields2.new_debug = 1;
> + }
> +
> packet->sh_mem_config = qpd->sh_mem_config;
> packet->sh_mem_bases = qpd->sh_mem_bases;
> if (qpd->tba_addr) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
> index 206f1960857f..8b6b2bd5c148 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
> @@ -146,7 +146,10 @@ struct pm4_mes_map_process {
> union {
> struct {
> uint32_t pasid:16;
> - uint32_t reserved1:8;
> + uint32_t reserved1:2;
> + uint32_t debug_vmid:4;
> + uint32_t new_debug:1;
> + uint32_t reserved2:1;
> uint32_t diq_enable:1;
> uint32_t process_quantum:7;
> } bitfields2;
More information about the amd-gfx
mailing list