[PATCH 4/4] drm/amdgpu: Set lower queue retry timeout for gfx9 family
Harish Kasiviswanathan
Harish.Kasiviswanathan at amd.com
Wed Feb 12 22:03:41 UTC 2025
Set more optimized queue retry timeout for gfx9 family starting with
arcturus.
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
---
.../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 7 ++++++
.../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 1 +
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 8 +++++-
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 1 +
.../drm/amd/amdkfd/kfd_packet_manager_v9.c | 25 +++++++++++++++----
.../gpu/drm/amd/include/kgd_kfd_interface.h | 1 +
6 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 8e72dcff8867..e62f5dc2529d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -1024,6 +1024,7 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
@@ -1035,6 +1036,12 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
SCH_WAVE,
grace_period);
}
+ if (que_sleep) {
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
+ }
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
index 9efd2dd4fdd7..11aedaa8a0b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -54,6 +54,7 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 04c86a229a23..3f74307d466d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -1080,6 +1080,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
@@ -1091,7 +1092,12 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
SCH_WAVE,
grace_period);
}
-
+ if (que_sleep) {
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
+ }
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index b6a91a552aa4..3f159d477f5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -100,6 +100,7 @@ void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index ecabf95d972f..a1b615dc9a19 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -305,13 +305,27 @@ static int pm_set_compute_queue_wait_counts_v9(struct packet_manager *pm,
struct device_queue_manager *dqm = pm->dqm;
uint32_t reg_offset = 0;
uint32_t reg_data = 0;
+ uint32_t que_sleep = 0;
if (wait_counts_config == KFD_INIT_CP_QUEUE_WAIT_TIMES) {
- /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
- if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
- KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))
- wait_counts_config = 1;
- else
+ /*
+ * For all GFX9 ASICs -
+ * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
+ * On a 1GHz machine this is roughly 1 microsecond, which is
+ * about how long it takes to load data out of memory during
+ * queue connect
+ * QUE_SLEEP: Wait Count for Dequeue Retry.
+ */
+ if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 4, 1) &&
+ KFD_GC_VERSION(dqm->dev) < IP_VERSION(10, 0, 0)) {
+ que_sleep = 1;
+ wait_counts_config = 0; /* use default dqm->wait_times */
+
+ /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
+ if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))
+ wait_counts_config = 1;
+ } else
return 0;
} else if (!wait_counts_config)
/*
@@ -328,6 +342,7 @@ static int pm_set_compute_queue_wait_counts_v9(struct packet_manager *pm,
pm->dqm->dev->adev,
pm->dqm->wait_times,
wait_counts_config,
+ que_sleep,
®_offset,
®_data);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index e3e635a31b8a..1ed3fbedf50b 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -316,6 +316,7 @@ struct kfd2kgd_calls {
void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
void (*get_cu_occupancy)(struct amdgpu_device *adev,
--
2.34.1
More information about the amd-gfx
mailing list