[PATCH v2 2/2] drm/amdgpu: Reduce dequeue retry timeout for gfx9 family
Kim, Jonathan
Jonathan.Kim at amd.com
Mon Mar 3 20:53:53 UTC 2025
[Public]
> -----Original Message-----
> From: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>
> Sent: Monday, March 3, 2025 3:44 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>; Kim, Jonathan
> <Jonathan.Kim at amd.com>
> Subject: [PATCH v2 2/2] drm/amdgpu: Reduce dequeue retry timeout for gfx9 family
>
> Dequeue retry timeout controls the interval between checks for unmet
> conditions. On MI series, reduce this from 0x40 to 0x1 (~ 1 uS). The
> cost of additional bandwidth consumed by CP when polling memory
> shouldn't be substantial.
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
> Reviewed-by: : Jonathan Kim <jonathan.kim at amd.com>
> ---
> .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 2 +-
> .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +-
> .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 4 +-
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 28 ++++++-------
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 5 ++-
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 2 +-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 28 ++++++-------
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 5 ++-
> .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 40 +++++++++++++------
> .../gpu/drm/amd/include/kgd_kfd_interface.h | 5 ++-
> 10 files changed, 70 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 8dfdb18197c4..53f5f1885870 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
> .clear_address_watch = kgd_gfx_v9_clear_address_watch,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> - .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> + .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
> .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
> .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
> .hqd_reset = kgd_gfx_v9_hqd_reset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 9abf29b58ac7..6fd41aece7e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> .set_address_watch = kgd_gfx_v9_set_address_watch,
> .clear_address_watch = kgd_gfx_v9_clear_address_watch,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> - .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> + .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
> .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> index e2ae714a700f..95f249896275 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> @@ -530,8 +530,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
> - .build_grace_period_packet_info =
> - kgd_gfx_v9_build_grace_period_packet_info,
> + .build_dequeue_wait_counts_packet_info =
> + kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
> .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 62176d607bef..0b03f2e9a858 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct
> amdgpu_device *adev,
> *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0,
> mmCP_IQ_WAIT_TIME2));
> }
>
> -void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
> +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device
> *adev,
> uint32_t wait_times,
> - uint32_t grace_period,
> + uint32_t sch_wave,
> + uint32_t que_sleep,
> uint32_t *reg_offset,
> uint32_t *reg_data)
> {
> *reg_data = wait_times;
>
> - /*
> - * The CP cannont handle a 0 grace period input and will result in
> - * an infinite grace period being set so set to 1 to prevent this.
> - */
> - if (grace_period == 0)
> - grace_period = 1;
> -
> - *reg_data = REG_SET_FIELD(*reg_data,
> - CP_IQ_WAIT_TIME2,
> - SCH_WAVE,
> - grace_period);
> + if (sch_wave)
> + *reg_data = REG_SET_FIELD(*reg_data,
> + CP_IQ_WAIT_TIME2,
> + SCH_WAVE,
> + sch_wave);
> + if (que_sleep)
> + *reg_data = REG_SET_FIELD(*reg_data,
> + CP_IQ_WAIT_TIME2,
> + QUE_SLEEP,
> + que_sleep);
>
> *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
> }
> @@ -1109,7 +1109,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
> .set_address_watch = kgd_gfx_v10_set_address_watch,
> .clear_address_watch = kgd_gfx_v10_clear_address_watch,
> .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
> - .build_grace_period_packet_info =
> kgd_gfx_v10_build_grace_period_packet_info,
> + .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
> .program_trap_handler_settings = program_trap_handler_settings,
> .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
> .hqd_reset = kgd_gfx_v10_hqd_reset
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index 9efd2dd4fdd7..89ae07288b10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct
> amdgpu_device *adev,
> void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
> uint32_t *wait_times,
> uint32_t inst);
> -void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
> +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device
> *adev,
> uint32_t wait_times,
> - uint32_t grace_period,
> + uint32_t sch_wave,
> + uint32_t que_sleep,
> uint32_t *reg_offset,
> uint32_t *reg_data);
> uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index c718bedda0ca..2c5f22838fe0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
> .set_vm_context_page_table_base =
> set_vm_context_page_table_base_v10_3,
> .program_trap_handler_settings = program_trap_handler_settings_v10_3,
> .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
> - .build_grace_period_packet_info =
> kgd_gfx_v10_build_grace_period_packet_info,
> + .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
> .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
> .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
> .validate_trap_override_request =
> kgd_gfx_v10_validate_trap_override_request,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 441568163e20..d2bbe9973c93 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct
> amdgpu_device *adev,
> adev->gfx.cu_info.max_waves_per_simd;
> }
>
> -void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
> +void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device
> *adev,
> uint32_t wait_times,
> - uint32_t grace_period,
> + uint32_t sch_wave,
> + uint32_t que_sleep,
> uint32_t *reg_offset,
> uint32_t *reg_data)
> {
> *reg_data = wait_times;
>
> - /*
> - * The CP cannot handle a 0 grace period input and will result in
> - * an infinite grace period being set so set to 1 to prevent this.
> - */
> - if (grace_period == 0)
> - grace_period = 1;
> -
> - *reg_data = REG_SET_FIELD(*reg_data,
> - CP_IQ_WAIT_TIME2,
> - SCH_WAVE,
> - grace_period);
> + if (sch_wave)
> + *reg_data = REG_SET_FIELD(*reg_data,
> + CP_IQ_WAIT_TIME2,
> + SCH_WAVE,
> + sch_wave);
> + if (que_sleep)
> + *reg_data = REG_SET_FIELD(*reg_data,
> + CP_IQ_WAIT_TIME2,
> + QUE_SLEEP,
> + que_sleep);
>
> *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
> }
> @@ -1254,7 +1254,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
> .set_address_watch = kgd_gfx_v9_set_address_watch,
> .clear_address_watch = kgd_gfx_v9_clear_address_watch,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> - .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> + .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
> .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index b6a91a552aa4..54ee8992be5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct
> amdgpu_device *adev,
> void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
> uint32_t *wait_times,
> uint32_t inst);
> -void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
> +void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device
> *adev,
> uint32_t wait_times,
> - uint32_t grace_period,
> + uint32_t sch_wave,
> + uint32_t que_sleep,
> uint32_t *reg_offset,
> uint32_t *reg_data);
> uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index b9c611b249e6..6861e8f7a2f7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -298,13 +298,14 @@ static int pm_map_queues_v9(struct packet_manager
> *pm, uint32_t *buffer,
> }
>
> static inline void pm_build_dequeue_wait_counts_packet_info(struct
> packet_manager *pm,
> - uint32_t sch_value, uint32_t *reg_offset,
> + uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
> uint32_t *reg_data)
> {
> - pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
> + pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
> pm->dqm->dev->adev,
> pm->dqm->wait_times,
> sch_value,
> + que_sleep,
> reg_offset,
> reg_data);
> }
> @@ -320,26 +321,41 @@ static int pm_config_dequeue_wait_counts_v9(struct
> packet_manager *pm,
>
> switch (cmd) {
> case KFD_DEQUEUE_WAIT_INIT:
> - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
> - if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev-
> >gmc.is_app_apu &&
> - (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
> - pm_build_dequeue_wait_counts_packet_info(pm, 1,
> ®_offset, ®_data);
> - else
> + uint32_t sch_wave = 0, que_sleep = 0;
> + /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default
> 0x40.
> + * On a 1GHz machine this is roughly 1 microsecond, which is
> + * about how long it takes to load data out of memory during
> + * queue connect
> + * QUE_SLEEP: Wait Count for Dequeue Retry.
> + */
> + if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
> + KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) {
> + que_sleep = 1;
> +
> + /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU
> */
> + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev-
> >gmc.is_app_apu &&
> + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4,
> 3)))
> + sch_wave = 1;
> + } else /* non gfx9 */
> return 0;
Please close the else statement braces to balance the if statement braces.
Also the /* non gfx9 */ comment isn't correct and should be removed because you can still have GFX9 devices that are pre v9.4.1 that do not need optimized wait times on dequeue.
Thanks,
Jon
> +
> + pm_build_dequeue_wait_counts_packet_info(pm, sch_wave,
> que_sleep,
> + ®_offset, ®_data);
> +
> break;
> case KFD_DEQUEUE_WAIT_RESET:
> - /* function called only to get reg_offset */
> - pm_build_dequeue_wait_counts_packet_info(pm, 0, ®_offset,
> ®_data);
> - reg_data = pm->dqm->wait_times;
> + /* reg_data would be set to dqm->wait_times */
> + pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, ®_offset,
> ®_data);
> break;
>
> case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
> /* The CP cannot handle value 0 and it will result in
> - * an infinite grace period being set so set to 1 to prevent this.
> + * an infinite grace period being set so set to 1 to prevent this. Also
> + * avoid debugger API breakage as it sets 0 and expects a low value.
> */
> if (!value)
> value = 1;
> - pm_build_dequeue_wait_counts_packet_info(pm, value, ®_offset,
> ®_data);
> + pm_build_dequeue_wait_counts_packet_info(pm, value, 0,
> ®_offset, ®_data);
> break;
> default:
> pr_err("Invalid dequeue wait cmd\n");
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index e3e635a31b8a..4f79e707bc80 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -313,9 +313,10 @@ struct kfd2kgd_calls {
> void (*get_iq_wait_times)(struct amdgpu_device *adev,
> uint32_t *wait_times,
> uint32_t inst);
> - void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
> + void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device
> *adev,
> uint32_t wait_times,
> - uint32_t grace_period,
> + uint32_t sch_wave,
> + uint32_t que_sleep,
> uint32_t *reg_offset,
> uint32_t *reg_data);
> void (*get_cu_occupancy)(struct amdgpu_device *adev,
> --
> 2.34.1
More information about the amd-gfx
mailing list