[PATCH 1/4] drm/amdgpu: Improve error checking in amdgpu_virt_rlcg_reg_rw
Victor Lu
victorchengchi.lu at amd.com
Tue Jan 2 17:30:10 UTC 2024
The current error detection only looks for a timeout.
This should be changed to also check scratch_reg1 for any errors
returned from RLCG.
Also add a new error value.
Signed-off-by: Victor Lu <victorchengchi.lu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 ++++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 0dcff2889e25..3cd085569515 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1022,7 +1022,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
* SCRATCH_REG0 = read/write value
* SCRATCH_REG1[30:28] = command
* SCRATCH_REG1[19:0] = address in dword
- * SCRATCH_REG1[26:24] = Error reporting
+ * SCRATCH_REG1[27:24] = Error reporting
*/
writel(v, scratch_reg0);
writel((offset | flag), scratch_reg1);
@@ -1036,7 +1036,8 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
udelay(10);
}
- if (i >= timeout) {
+ tmp = readl(scratch_reg1);
+ if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) {
if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
dev_err(adev->dev,
@@ -1047,6 +1048,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
} else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
dev_err(adev->dev,
"register is not in range, rlcg failed to program reg: 0x%05x\n", offset);
+ } else if (tmp & AMDGPU_RLCG_INVALID_XCD_ACCESS) {
+ dev_err(adev->dev,
+ "invalid xcd access, rlcg failed to program reg: 0x%05x\n", offset);
} else {
dev_err(adev->dev,
"unknown error type, rlcg failed to program reg: 0x%05x\n", offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index d4207e44141f..447af2e4aef0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -40,11 +40,13 @@
#define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28)
/* error code for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_INVALID_XCD_ACCESS 0x8000000
#define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000
#define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000
#define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000
#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF
+#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000
/* all asic after AI use this offset */
#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
--
2.34.1
More information about the amd-gfx
mailing list