[PATCH] drm/amdgpu: fix vulkan test performance drop and hang on VI
Deucher, Alexander
Alexander.Deucher at amd.com
Mon Jul 3 15:47:38 UTC 2017
> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
> Of Rex Zhu
> Sent: Monday, July 03, 2017 6:13 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Zhu, Rex
> Subject: [PATCH] drm/amdgpu: fix vulkan test performance drop and hang
> on VI
>
> caused by not program dynamic_cu_mask_addr in the KIQ MQD.
>
> v2: create struct vi_mqd_allocation in FB which will contain
> 1. PM4 MQD structure.
> 2. Write Pointer Poll Memory.
> 3. Read Pointer Report Memory
> 4. Dynamic CU Mask.
> 5. Dynamic RB Mask.
>
> Change-Id: I22c840f1bf8d365f7df33a27d6b11e1aea8f2958
> Signed-off-by: Rex Zhu <Rex.Zhu at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 27 ++--
> drivers/gpu/drm/amd/include/vi_structs.h | 268
> +++++++++++++++++++++++++++++++
> 2 files changed, 285 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 1a75ab1..452cc5b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -40,7 +40,6 @@
>
> #include "bif/bif_5_0_d.h"
> #include "bif/bif_5_0_sh_mask.h"
> -
> #include "gca/gfx_8_0_d.h"
> #include "gca/gfx_8_0_enum.h"
> #include "gca/gfx_8_0_sh_mask.h"
> @@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle)
> return r;
>
> /* create MQD for all compute queues as well as KIQ for SRIOV case
> */
> - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct
> vi_mqd));
> + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct
> vi_mqd_allocation));
> if (r)
> return r;
>
> @@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring
> *ring)
> uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
> uint32_t tmp;
>
> - /* init the mqd struct */
> - memset(mqd, 0, sizeof(struct vi_mqd));
> -
> mqd->header = 0xC0310800;
> mqd->compute_pipelinestat_enable = 0x00000001;
> mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
> @@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring
> *ring)
> mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
> mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
> mqd->compute_misc_reserved = 0x00000003;
> -
> + if (!(adev->flags & AMD_IS_APU)) {
> + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring-
> >mqd_gpu_addr
> + + offsetof(struct
> vi_mqd_allocation, dyamic_cu_mask));
> + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring-
> >mqd_gpu_addr
> + + offsetof(struct
> vi_mqd_allocation, dyamic_cu_mask));
> + }
> eop_base_addr = ring->eop_gpu_addr >> 8;
> mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
> mqd->cp_hqd_eop_base_addr_hi =
> upper_32_bits(eop_base_addr);
> @@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct
> amdgpu_ring *ring)
> if (adev->gfx.in_reset) { /* for GPU_RESET case */
> /* reset MQD to a clean status */
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> - memcpy(mqd, adev-
> >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
> + memcpy(mqd, adev-
> >gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
>
> /* reset ring buffer */
> ring->wptr = 0;
> @@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct
> amdgpu_ring *ring)
> vi_srbm_select(adev, 0, 0, 0, 0);
> mutex_unlock(&adev->srbm_mutex);
> } else {
> + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
> + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask =
> 0xFFFFFFFF;
> + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask =
> 0xFFFFFFFF;
> mutex_lock(&adev->srbm_mutex);
> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> gfx_v8_0_mqd_init(ring);
> @@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct
> amdgpu_ring *ring)
> mutex_unlock(&adev->srbm_mutex);
>
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> - memcpy(adev->gfx.mec.mqd_backup[mqd_idx],
> mqd, sizeof(*mqd));
> + memcpy(adev->gfx.mec.mqd_backup[mqd_idx],
> mqd, sizeof(struct vi_mqd_allocation));
> }
>
> return r;
> @@ -4947,6 +4951,9 @@ static int gfx_v8_0_kcq_init_queue(struct
> amdgpu_ring *ring)
> int mqd_idx = ring - &adev->gfx.compute_ring[0];
>
> if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
> + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
> + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask =
> 0xFFFFFFFF;
> + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask =
> 0xFFFFFFFF;
> mutex_lock(&adev->srbm_mutex);
> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> gfx_v8_0_mqd_init(ring);
> @@ -4954,11 +4961,11 @@ static int gfx_v8_0_kcq_init_queue(struct
> amdgpu_ring *ring)
> mutex_unlock(&adev->srbm_mutex);
>
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> - memcpy(adev->gfx.mec.mqd_backup[mqd_idx],
> mqd, sizeof(*mqd));
> + memcpy(adev->gfx.mec.mqd_backup[mqd_idx],
> mqd, sizeof(struct vi_mqd_allocation));
> } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
> /* reset MQD to a clean status */
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> - memcpy(mqd, adev-
> >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
> + memcpy(mqd, adev-
> >gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
> /* reset ring buffer */
> ring->wptr = 0;
> amdgpu_ring_clear_ring(ring);
> diff --git a/drivers/gpu/drm/amd/include/vi_structs.h
> b/drivers/gpu/drm/amd/include/vi_structs.h
> index b68f8ef..ca93b51 100644
> --- a/drivers/gpu/drm/amd/include/vi_structs.h
> +++ b/drivers/gpu/drm/amd/include/vi_structs.h
> @@ -195,6 +195,274 @@ struct vi_mqd {
> uint32_t compute_wave_restore_addr_lo;
> uint32_t compute_wave_restore_addr_hi;
> uint32_t compute_wave_restore_control;
> + uint32_t reserved9;
> + uint32_t reserved10;
> + uint32_t reserved11;
> + uint32_t reserved12;
> + uint32_t reserved13;
> + uint32_t reserved14;
> + uint32_t reserved15;
> + uint32_t reserved16;
> + uint32_t reserved17;
> + uint32_t reserved18;
> + uint32_t reserved19;
> + uint32_t reserved20;
> + uint32_t reserved21;
> + uint32_t reserved22;
> + uint32_t reserved23;
> + uint32_t reserved24;
> + uint32_t reserved25;
> + uint32_t reserved26;
> + uint32_t reserved27;
> + uint32_t reserved28;
> + uint32_t reserved29;
> + uint32_t reserved30;
> + uint32_t reserved31;
> + uint32_t reserved32;
> + uint32_t reserved33;
> + uint32_t reserved34;
> + uint32_t compute_user_data_0;
> + uint32_t compute_user_data_1;
> + uint32_t compute_user_data_2;
> + uint32_t compute_user_data_3;
> + uint32_t compute_user_data_4;
> + uint32_t compute_user_data_5;
> + uint32_t compute_user_data_6;
> + uint32_t compute_user_data_7;
> + uint32_t compute_user_data_8;
> + uint32_t compute_user_data_9;
> + uint32_t compute_user_data_10;
> + uint32_t compute_user_data_11;
> + uint32_t compute_user_data_12;
> + uint32_t compute_user_data_13;
> + uint32_t compute_user_data_14;
> + uint32_t compute_user_data_15;
> + uint32_t cp_compute_csinvoc_count_lo;
> + uint32_t cp_compute_csinvoc_count_hi;
> + uint32_t reserved35;
> + uint32_t reserved36;
> + uint32_t reserved37;
> + uint32_t cp_mqd_query_time_lo;
> + uint32_t cp_mqd_query_time_hi;
> + uint32_t cp_mqd_connect_start_time_lo;
> + uint32_t cp_mqd_connect_start_time_hi;
> + uint32_t cp_mqd_connect_end_time_lo;
> + uint32_t cp_mqd_connect_end_time_hi;
> + uint32_t cp_mqd_connect_end_wf_count;
> + uint32_t cp_mqd_connect_end_pq_rptr;
> + uint32_t cp_mqd_connect_endvi_sdma_mqd_pq_wptr;
> + uint32_t cp_mqd_connect_end_ib_rptr;
> + uint32_t reserved38;
> + uint32_t reserved39;
> + uint32_t cp_mqd_save_start_time_lo;
> + uint32_t cp_mqd_save_start_time_hi;
> + uint32_t cp_mqd_save_end_time_lo;
> + uint32_t cp_mqd_save_end_time_hi;
> + uint32_t cp_mqd_restore_start_time_lo;
> + uint32_t cp_mqd_restore_start_time_hi;
> + uint32_t cp_mqd_restore_end_time_lo;
> + uint32_t cp_mqd_restore_end_time_hi;
> + uint32_t disable_queue;
> + uint32_t reserved41;
> + uint32_t gds_cs_ctxsw_cnt0;
> + uint32_t gds_cs_ctxsw_cnt1;
> + uint32_t gds_cs_ctxsw_cnt2;
> + uint32_t gds_cs_ctxsw_cnt3;
> + uint32_t reserved42;
> + uint32_t reserved43;
> + uint32_t cp_pq_exe_status_lo;
> + uint32_t cp_pq_exe_status_hi;
> + uint32_t cp_packet_id_lo;
> + uint32_t cp_packet_id_hi;
> + uint32_t cp_packet_exe_status_lo;
> + uint32_t cp_packet_exe_status_hi;
> + uint32_t gds_save_base_addr_lo;
> + uint32_t gds_save_base_addr_hi;
> + uint32_t gds_save_mask_lo;
> + uint32_t gds_save_mask_hi;
> + uint32_t ctx_save_base_addr_lo;
> + uint32_t ctx_save_base_addr_hi;
> + uint32_t dynamic_cu_mask_addr_lo;
> + uint32_t dynamic_cu_mask_addr_hi;
> + uint32_t cp_mqd_base_addr_lo;
> + uint32_t cp_mqd_base_addr_hi;
> + uint32_t cp_hqd_active;
> + uint32_t cp_hqd_vmid;
> + uint32_t cp_hqd_persistent_state;
> + uint32_t cp_hqd_pipe_priority;
> + uint32_t cp_hqd_queue_priority;
> + uint32_t cp_hqd_quantum;
> + uint32_t cp_hqd_pq_base_lo;
> + uint32_t cp_hqd_pq_base_hi;
> + uint32_t cp_hqd_pq_rptr;
> + uint32_t cp_hqd_pq_rptr_report_addr_lo;
> + uint32_t cp_hqd_pq_rptr_report_addr_hi;
> + uint32_t cp_hqd_pq_wptr_poll_addr_lo;
> + uint32_t cp_hqd_pq_wptr_poll_addr_hi;
> + uint32_t cp_hqd_pq_doorbell_control;
> + uint32_t cp_hqd_pq_wptr;
> + uint32_t cp_hqd_pq_control;
> + uint32_t cp_hqd_ib_base_addr_lo;
> + uint32_t cp_hqd_ib_base_addr_hi;
> + uint32_t cp_hqd_ib_rptr;
> + uint32_t cp_hqd_ib_control;
> + uint32_t cp_hqd_iq_timer;
> + uint32_t cp_hqd_iq_rptr;
> + uint32_t cp_hqd_dequeue_request;
> + uint32_t cp_hqd_dma_offload;
> + uint32_t cp_hqd_sema_cmd;
> + uint32_t cp_hqd_msg_type;
> + uint32_t cp_hqd_atomic0_preop_lo;
> + uint32_t cp_hqd_atomic0_preop_hi;
> + uint32_t cp_hqd_atomic1_preop_lo;
> + uint32_t cp_hqd_atomic1_preop_hi;
> + uint32_t cp_hqd_hq_status0;
> + uint32_t cp_hqd_hq_control0;
> + uint32_t cp_mqd_control;
> + uint32_t cp_hqd_hq_status1;
> + uint32_t cp_hqd_hq_control1;
> + uint32_t cp_hqd_eop_base_addr_lo;
> + uint32_t cp_hqd_eop_base_addr_hi;
> + uint32_t cp_hqd_eop_control;
> + uint32_t cp_hqd_eop_rptr;
> + uint32_t cp_hqd_eop_wptr;
> + uint32_t cp_hqd_eop_done_events;
> + uint32_t cp_hqd_ctx_save_base_addr_lo;
> + uint32_t cp_hqd_ctx_save_base_addr_hi;
> + uint32_t cp_hqd_ctx_save_control;
> + uint32_t cp_hqd_cntl_stack_offset;
> + uint32_t cp_hqd_cntl_stack_size;
> + uint32_t cp_hqd_wg_state_offset;
> + uint32_t cp_hqd_ctx_save_size;
> + uint32_t cp_hqd_gds_resource_state;
> + uint32_t cp_hqd_error;
> + uint32_t cp_hqd_eop_wptr_mem;
> + uint32_t cp_hqd_eop_dones;
> + uint32_t reserved46;
> + uint32_t reserved47;
> + uint32_t reserved48;
> + uint32_t reserved49;
> + uint32_t reserved50;
> + uint32_t reserved51;
> + uint32_t reserved52;
> + uint32_t reserved53;
> + uint32_t reserved54;
> + uint32_t reserved55;
> + uint32_t iqtimer_pkt_header;
> + uint32_t iqtimer_pkt_dw0;
> + uint32_t iqtimer_pkt_dw1;
> + uint32_t iqtimer_pkt_dw2;
> + uint32_t iqtimer_pkt_dw3;
> + uint32_t iqtimer_pkt_dw4;
> + uint32_t iqtimer_pkt_dw5;
> + uint32_t iqtimer_pkt_dw6;
> + uint32_t iqtimer_pkt_dw7;
> + uint32_t iqtimer_pkt_dw8;
> + uint32_t iqtimer_pkt_dw9;
> + uint32_t iqtimer_pkt_dw10;
> + uint32_t iqtimer_pkt_dw11;
> + uint32_t iqtimer_pkt_dw12;
> + uint32_t iqtimer_pkt_dw13;
> + uint32_t iqtimer_pkt_dw14;
> + uint32_t iqtimer_pkt_dw15;
> + uint32_t iqtimer_pkt_dw16;
> + uint32_t iqtimer_pkt_dw17;
> + uint32_t iqtimer_pkt_dw18;
> + uint32_t iqtimer_pkt_dw19;
> + uint32_t iqtimer_pkt_dw20;
> + uint32_t iqtimer_pkt_dw21;
> + uint32_t iqtimer_pkt_dw22;
> + uint32_t iqtimer_pkt_dw23;
> + uint32_t iqtimer_pkt_dw24;
> + uint32_t iqtimer_pkt_dw25;
> + uint32_t iqtimer_pkt_dw26;
> + uint32_t iqtimer_pkt_dw27;
> + uint32_t iqtimer_pkt_dw28;
> + uint32_t iqtimer_pkt_dw29;
> + uint32_t iqtimer_pkt_dw30;
> + uint32_t iqtimer_pkt_dw31;
> + uint32_t reserved56;
> + uint32_t reserved57;
> + uint32_t reserved58;
> + uint32_t set_resources_header;
> + uint32_t set_resources_dw1;
> + uint32_t set_resources_dw2;
> + uint32_t set_resources_dw3;
> + uint32_t set_resources_dw4;
> + uint32_t set_resources_dw5;
> + uint32_t set_resources_dw6;
> + uint32_t set_resources_dw7;
> + uint32_t reserved59;
> + uint32_t reserved60;
> + uint32_t reserved61;
> + uint32_t reserved62;
> + uint32_t reserved63;
> + uint32_t reserved64;
> + uint32_t reserved65;
> + uint32_t reserved66;
> + uint32_t reserved67;
> + uint32_t reserved68;
> + uint32_t reserved69;
> + uint32_t reserved70;
> + uint32_t reserved71;
> + uint32_t reserved72;
> + uint32_t reserved73;
> + uint32_t reserved74;
> + uint32_t reserved75;
> + uint32_t reserved76;
> + uint32_t reserved77;
> + uint32_t reserved78;
> + uint32_t reserved_t[256];
> +};
> +
> +struct vi_mqd_allocation {
> + struct vi_mqd mqd;
> + uint32_t wptr_poll_mem;
> + uint32_t rptr_report_mem;
> + uint32_t dyamic_cu_mask;
> + uint32_t dyamic_rb_mask;
> +};
> +
> +struct cz_mqd {
> + uint32_t header;
> + uint32_t compute_dispatch_initiator;
> + uint32_t compute_dim_x;
> + uint32_t compute_dim_y;
> + uint32_t compute_dim_z;
> + uint32_t compute_start_x;
> + uint32_t compute_start_y;
> + uint32_t compute_start_z;
> + uint32_t compute_num_thread_x;
> + uint32_t compute_num_thread_y;
> + uint32_t compute_num_thread_z;
> + uint32_t compute_pipelinestat_enable;
> + uint32_t compute_perfcount_enable;
> + uint32_t compute_pgm_lo;
> + uint32_t compute_pgm_hi;
> + uint32_t compute_tba_lo;
> + uint32_t compute_tba_hi;
> + uint32_t compute_tma_lo;
> + uint32_t compute_tma_hi;
> + uint32_t compute_pgm_rsrc1;
> + uint32_t compute_pgm_rsrc2;
> + uint32_t compute_vmid;
> + uint32_t compute_resource_limits;
> + uint32_t compute_static_thread_mgmt_se0;
> + uint32_t compute_static_thread_mgmt_se1;
> + uint32_t compute_tmpring_size;
> + uint32_t compute_static_thread_mgmt_se2;
> + uint32_t compute_static_thread_mgmt_se3;
> + uint32_t compute_restart_x;
> + uint32_t compute_restart_y;
> + uint32_t compute_restart_z;
> + uint32_t compute_thread_trace_enable;
> + uint32_t compute_misc_reserved;
> + uint32_t compute_dispatch_id;
> + uint32_t compute_threadgroup_id;
> + uint32_t compute_relaunch;
> + uint32_t compute_wave_restore_addr_lo;
> + uint32_t compute_wave_restore_addr_hi;
> + uint32_t compute_wave_restore_control;
> uint32_t reserved_39;
> uint32_t reserved_40;
> uint32_t reserved_41;
> --
> 1.9.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list