[PATCH 1/2] drm/amdkfd: Add pm_config_dequeue_wait_counts API
Harish Kasiviswanathan
Harish.Kasiviswanathan at amd.com
Wed Feb 26 19:23:02 UTC 2025
Update pm_update_grace_period() to more cleaner
pm_config_dequeue_wait_counts(). Previously, grace_period variable was
overloaded as a variable and a macro, making it inflexible to configure
additional dequeue wait times.
pm_config_dequeue_wait_counts() now takes in a cmd / variable. This
allows flexibility to update different dequeue wait times.
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 +++++++--------
.../drm/amd/amdkfd/kfd_device_queue_manager.h | 11 +++-
.../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 ++++++++-
.../drm/amd/amdkfd/kfd_packet_manager_v9.c | 56 ++++++++++++++-----
.../drm/amd/amdkfd/kfd_packet_manager_vi.c | 4 +-
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 ++++++++--
6 files changed, 120 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 94b1ac8a4735..cc7465f9562a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -42,6 +42,8 @@
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
+/* See unmap_queues_cpsch() */
+#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
u32 pasid, unsigned int vmid);
@@ -1745,10 +1747,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
init_sdma_bitmaps(dqm);
- if (dqm->dev->kfd2kgd->get_iq_wait_times)
- dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- &dqm->wait_times,
- ffs(dqm->dev->xcc_mask) - 1);
+ update_dqm_wait_times(dqm);
return 0;
}
@@ -1844,25 +1843,11 @@ static int start_cpsch(struct device_queue_manager *dqm)
/* clear hang status when driver try to start the hw scheduler */
dqm->sched_running = true;
- if (!dqm->dev->kfd->shared_resources.enable_mes)
+ if (!dqm->dev->kfd->shared_resources.enable_mes) {
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_INIT, 0 /* unused */))
+ dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n");
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
-
- /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
- if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
- (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
- uint32_t reg_offset = 0;
- uint32_t grace_period = 1;
-
- retval = pm_update_grace_period(&dqm->packet_mgr,
- grace_period);
- if (retval)
- dev_err(dev, "Setting grace timeout failed\n");
- else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
- /* Update dqm->wait_times maintained in software */
- dqm->dev->kfd2kgd->build_grace_period_packet_info(
- dqm->dev->adev, dqm->wait_times,
- grace_period, ®_offset,
- &dqm->wait_times);
}
/* setup per-queue reset detection buffer */
@@ -2261,7 +2246,14 @@ static int reset_queues_on_hws_hang(struct device_queue_manager *dqm)
return r;
}
-/* dqm->lock mutex has to be locked before calling this function */
+/* dqm->lock mutex has to be locked before calling this function
+ *
+ * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time
+ * for context switch latency. Lower values are used by debugger
+ * since context switching are triggered at high frequency.
+ * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE
+ *
+ */
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param,
@@ -2280,7 +2272,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
return -EIO;
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
- retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
+ retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period);
if (retval)
goto out;
}
@@ -2324,8 +2317,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
/* We need to reset the grace period value for this device */
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
- if (pm_update_grace_period(&dqm->packet_mgr,
- USE_DEFAULT_GRACE_PERIOD))
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_RESET, 0 /* unused */))
dev_err(dev, "Failed to reset grace period\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 09ab36f8e8c6..917717cfe9c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -37,7 +37,6 @@
#define KFD_MES_PROCESS_QUANTUM 100000
#define KFD_MES_GANG_QUANTUM 10000
-#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
struct device_process_node {
struct qcm_process_device *qpd;
@@ -359,4 +358,14 @@ static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val
/* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
return get_user(*val, q_rptr + 1);
}
+
+static inline void update_dqm_wait_times(struct device_queue_manager *dqm)
+{
+ if (dqm->dev->kfd2kgd->get_iq_wait_times)
+ dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
+}
+
+
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 4984b41cd372..dd19ae40f0ba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -396,14 +396,29 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
return retval;
}
-int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
+ * by writing to CP_IQ_WAIT_TIME2 registers.
+ *
+ * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
+ * @value: Depends on the cmd. This parameter is unused for
+ * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
+ * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
+ *
+ */
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
{
struct kfd_node *node = pm->dqm->dev;
struct device *dev = node->adev->dev;
int retval = 0;
uint32_t *buffer, size;
- size = pm->pmf->set_grace_period_size;
+ if (!pm->pmf->config_dequeue_wait_counts ||
+ !pm->pmf->config_dequeue_wait_counts_size)
+ return 0;
+
+ size = pm->pmf->config_dequeue_wait_counts_size;
mutex_lock(&pm->lock);
@@ -419,13 +434,18 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
goto out;
}
- retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
+ retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
+ cmd, value);
if (!retval)
retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
}
+ /* If default value is modified, cache that value in dqm->wait_times */
+ if (cmd == KFD_DEQUEUE_WAIT_INIT)
+ update_dqm_wait_times(pm->dqm);
+
out:
mutex_unlock(&pm->lock);
return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index d56525201155..9cb21af1d0af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -297,23 +297,51 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-static int pm_set_grace_period_v9(struct packet_manager *pm,
+static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
+ uint32_t sch_value, uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
+ pm->dqm->dev->adev,
+ pm->dqm->wait_times,
+ sch_value,
+ reg_offset,
+ reg_data);
+}
+
+static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
uint32_t *buffer,
- uint32_t grace_period)
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
{
struct pm4_mec_write_data_mmio *packet;
uint32_t reg_offset = 0;
uint32_t reg_data = 0;
- pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
- pm->dqm->dev->adev,
- pm->dqm->wait_times,
- grace_period,
- ®_offset,
- ®_data);
-
- if (grace_period == USE_DEFAULT_GRACE_PERIOD)
+ switch (cmd) {
+ case KFD_DEQUEUE_WAIT_INIT:
+ /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
+ if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
+ (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
+ pm_build_dequeue_wait_counts_packet_info(pm, 1, ®_offset, ®_data);
+ else
+ return 0;
+ break;
+ case KFD_DEQUEUE_WAIT_RESET:
+ /* function called only to get reg_offset */
+ pm_build_dequeue_wait_counts_packet_info(pm, 0, ®_offset, ®_data);
reg_data = pm->dqm->wait_times;
+ break;
+
+ case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
+ /* The CP cannot handle value 0 and it will result in
+ * an infinite grace period being set so set to 1 to prevent this.
+ */
+ if (!value)
+ value = 1;
+ pm_build_dequeue_wait_counts_packet_info(pm, value, ®_offset, ®_data);
+ break;
+ }
packet = (struct pm4_mec_write_data_mmio *)buffer;
memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
@@ -415,7 +443,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
- .set_grace_period = pm_set_grace_period_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -423,7 +451,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
@@ -434,7 +462,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
- .set_grace_period = pm_set_grace_period_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
@@ -442,7 +470,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index 347c86e1c378..a1de5d7e173a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -304,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources = pm_set_resources_vi,
.map_queues = pm_map_queues_vi,
.unmap_queues = pm_unmap_queues_vi,
- .set_grace_period = NULL,
+ .config_dequeue_wait_counts = NULL,
.query_status = pm_query_status_vi,
.release_mem = pm_release_mem_vi,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -312,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = 0,
+ .config_dequeue_wait_counts_size = 0,
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem)
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 59619f794b6b..4c8026947a73 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1392,6 +1392,24 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
#define KFD_FENCE_COMPLETED (100)
#define KFD_FENCE_INIT (10)
+/**
+ * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring
+ * dequeue wait counts.
+ *
+ * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a
+ * certain ASICs. For these ASICs, this is default value used by RESET
+ * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value
+ * for certain ASICs. For others set it to default hardware reset value
+ * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait
+ *
+ */
+enum kfd_config_dequeue_wait_counts_cmd {
+ KFD_DEQUEUE_WAIT_INIT = 1,
+ KFD_DEQUEUE_WAIT_RESET = 2,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3
+};
+
+
struct packet_manager {
struct device_queue_manager *dqm;
struct kernel_queue *priv_queue;
@@ -1417,8 +1435,8 @@ struct packet_manager_funcs {
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset);
- int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
- uint32_t grace_period);
+ int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1429,7 +1447,7 @@ struct packet_manager_funcs {
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
- int set_grace_period_size;
+ int config_dequeue_wait_counts_size;
int query_status_size;
int release_mem_size;
};
@@ -1452,7 +1470,9 @@ int pm_send_unmap_queue(struct packet_manager *pm,
void pm_release_ib(struct packet_manager *pm);
-int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t wait_counts_config);
/* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
--
2.34.1
More information about the amd-gfx
mailing list