[PATCH 07/11] drm/amdkfd: add gc 9.5.0 support on kfd
Alex Deucher
alexander.deucher at amd.com
Mon Dec 2 21:49:48 UTC 2024
From: Alex Sierra <alex.sierra at amd.com>
Initial support for GC 9.5.0.
v2: squash in pqm_clean_queue_resource() fix from Lijo
Signed-off-by: Alex Sierra <alex.sierra at amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 1 +
drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 1 +
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 43 +++++++++++--------
.../amd/amdkfd/kfd_device_queue_manager_v9.c | 3 +-
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 13 ++++--
.../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +-
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 5 ++-
.../amd/amdkfd/kfd_process_queue_manager.c | 13 +++---
drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 3 +-
10 files changed, 57 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 7b826a136ceb9..b666598ccd0e5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1628,6 +1628,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
*pcache_info);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 924d0fd85dfb8..27aa1a5b120ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -79,6 +79,7 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) ||
KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9b51dd75fefc7..b58f798bf2d56 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -85,6 +85,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(4, 4, 0):/* ALDEBARAN */
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
case IP_VERSION(5, 0, 0):/* NAVI10 */
case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
case IP_VERSION(5, 0, 2):/* NAVI14 */
@@ -152,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
break;
case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
case IP_VERSION(9, 4, 4): /* GC 9.4.4 */
+ case IP_VERSION(9, 5, 0): /* GC 9.5.0 */
kfd->device_info.event_interrupt_class =
&event_interrupt_class_v9_4_3;
break;
@@ -356,6 +358,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
gfx_target_version = 90402;
f2g = &gc_9_4_3_kfd2kgd;
break;
+ case IP_VERSION(9, 5, 0):
+ gfx_target_version = 90500;
+ f2g = &gc_9_4_3_kfd2kgd;
+ break;
/* Navi10 */
case IP_VERSION(10, 1, 10):
gfx_target_version = 100100;
@@ -515,6 +521,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
> KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
> KFD_CWSR_TMA_OFFSET);
@@ -567,6 +577,7 @@ static int kfd_gws_init(struct kfd_node *node)
&& kfd->mec2_fw_version >= 0x28) ||
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b) ||
@@ -733,14 +744,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
- /* For GFX9.4.3, we need special handling for VMIDs depending on
- * partition mode.
+ /* For multi-partition capable GPUs, we need special handling for VMIDs
+ * depending on partition mode.
* In CPX mode, the VMID range needs to be shared between XCDs.
* Additionally, there are 13 VMIDs (3-15) available for KFD. To
* divide them equally, we change starting VMID to 4 and not use
* VMID 3.
- * If the VMID range changes for GFX9.4.3, then this code MUST be
- * revisited.
+ * If the VMID range changes for multi-partition capable GPUs, then
+ * this code MUST be revisited.
*/
if (kfd->adev->xcp_mgr) {
partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
@@ -805,14 +816,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
/*
- * For GFX9.4.3, the KFD abstracts all partitions within a socket as
- * xGMI connected in the topology so assign a unique hive id per
- * device based on the pci device location if device is in PCIe mode.
+ * For multi-partition capable GPUs, the KFD abstracts all partitions
+ * within a socket as xGMI connected in the topology so assign a unique
+ * hive id per device based on the pci device location if device is in
+ * PCIe mode.
*/
- if (!kfd->hive_id &&
- (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
- kfd->num_nodes > 1)
+ if (!kfd->hive_id && kfd->num_nodes > 1)
kfd->hive_id = pci_dev_id(kfd->adev->pdev);
kfd->noretry = kfd->adev->gmc.noretry;
@@ -850,12 +859,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
}
- if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
- partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
kfd->num_nodes != 1) {
- /* For GFX9.4.3 and CPX mode, first XCD gets VMID range
- * 4-9 and second XCD gets VMID range 10-15.
+ /* For multi-partition capable GPUs and CPX mode, first
+ * XCD gets VMID range 4-9 and second XCD gets VMID
+ * range 10-15.
*/
node->vm_info.first_vmid_kfd = (i%2 == 0) ?
@@ -879,8 +887,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
amdgpu_amdkfd_get_local_mem_info(kfd->adev,
&node->local_mem_info, node->xcp);
- if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4))
+ if (kfd->adev->xcp_mgr)
kfd_setup_interrupt_bitmap(node, i);
/* Initialize the KFD node */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 210bcc048f4c5..67137e674f1d0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -64,7 +64,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
qpd->sh_mem_config |=
(1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 84e8ea3a8a0c9..ff417d5361c42 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -78,7 +78,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
@@ -301,7 +302,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_ctx_save_control = 0;
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
@@ -885,7 +887,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_v9_4_3;
mqd->load_mqd = load_mqd_v9_4_3;
mqd->update_mqd = update_mqd_v9_4_3;
@@ -909,8 +912,10 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
+ mqd->check_preemption_failed = check_preemption_failed;
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_hiq_v9_4_3;
mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index cac706dd66ed8..4984b41cd3721 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -260,7 +260,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
default:
if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
pm->pmf = &kfd_aldebaran_pm_funcs;
else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
pm->pmf = &kfd_v9_pm_funcs;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9e5ca0b93b2a2..800e4ae5b5735 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -207,7 +207,8 @@ enum cache_policy {
#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \
- (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)))
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)))
struct kfd_node;
@@ -1150,7 +1151,8 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
uint32_t i;
if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0))
return dev->nodes[0];
for (i = 0; i < dev->num_nodes; i++)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 87cd52cf4ee99..785d68b76f694 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -2146,10 +2146,11 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
irq_drain_fence[3] = pdd->process->pasid;
/*
- * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+ * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3]
*/
if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) {
node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
irq_drain_fence[3] |= node_id << 16;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index c76db22a10005..1c3c0a686df29 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -131,8 +131,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
!dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
@@ -197,6 +198,7 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
if (pqn->q->gws) {
if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
!dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(
pqm->process->kgd_process_info, pqn->q->gws);
@@ -316,11 +318,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
unsigned int max_queues = 127; /* HWS limit */
/*
- * On GFX 9.4.3, increase the number of queues that
- * can be created to 255. No HWS limit on GFX 9.4.3.
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
*/
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
max_queues = 255;
q = NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 9476e30d6baa1..52c6c5e9d264e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1714,7 +1714,8 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
else
mode = UNKNOWN_MEMORY_PARTITION_MODE;
--
2.47.0
More information about the amd-gfx
mailing list