[PATCH] drm/amdgpu: update SDMA sysfs reset mask in late_init
jesse.zhang at amd.com
jesse.zhang at amd.com
Mon Feb 24 02:46:35 UTC 2025
From: "Jesse.zhang at amd.com" <Jesse.zhang at amd.com>
- Added `sdma_v4_4_2_update_reset_mask` function to update the reset mask.
- update the sysfs reset mask to the `late_init` stage to ensure that the SMU initialization
and capability setup are completed before checking the SDMA reset capability.
- For IP versions 9.4.3 and 9.4.4, enable per-queue reset if the MEC firmware version is at least 0xb0 and PMFW supports queue reset.
- Add a TODO comment for future support of per-queue reset for IP version 9.5.0.
This change ensures that per-queue reset is only enabled when the MEC and PMFW support it.
v2: fix ip version (9.5.4 -> 9.5.0)(Lijo)
Suggested-by: Jonathan Kim <Jonathan.Kim at amd.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 37 +++++++++++++++++++++++-
1 file changed, 36 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 4fa688e00f5e..ba43c8f46f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -107,6 +107,7 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev);
static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
u32 instance, u32 offset)
@@ -1374,6 +1375,12 @@ static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block)
if (!amdgpu_persistent_edc_harvesting_supported(adev))
amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
+ /* The initialization is done in the late_init stage to ensure that the SMU
+ * initialization and capability setup are completed before we check the SDMA
+ * reset capability
+ */
+ sdma_v4_4_2_update_reset_mask(adev);
+
return 0;
}
@@ -1481,7 +1488,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
}
}
- /* TODO: Add queue reset mask when FW fully supports it */
adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
@@ -2328,6 +2334,35 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
+/**
+ * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA
+ * @adev: Pointer to the AMDGPU device structure
+ *
+ * This function update reset mask for SDMA and sets the supported
+ * reset types based on the IP version and firmware versions.
+ *
+ */
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
+{
+
+ /*
+ * the user queue relies on MEC fw and pmfw when the sdma queue do reset.
+ * it needs to check both of them at here to skip old mec and pmfw.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(9, 5, 0):
+ /*TODO: enable the queue reset flag until fw supported */
+ default:
+ break;
+ }
+
+}
+
const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 4,
--
2.25.1
More information about the amd-gfx
mailing list