[PATCH 5/5] drm/amdgpu: skip GFX FED error in page fault handling
Tao Zhou
tao.zhou1 at amd.com
Fri Feb 23 08:23:52 UTC 2024
Let kfd interrupt handler process it.
v2: return 0 instead of 1 for fed error.
drop the usage of strcmp in interrupt handler.
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 001e96d89cd7..09364817ae97 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -552,7 +552,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
{
bool retry_fault = !!(entry->src_data[1] & 0x80);
bool write_fault = !!(entry->src_data[1] & 0x20);
- uint32_t status = 0, cid = 0, rw = 0;
+ uint32_t status = 0, cid = 0, rw = 0, fed = 0;
struct amdgpu_task_info task_info;
struct amdgpu_vmhub *hub;
const char *mmhub_cid;
@@ -663,6 +663,14 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
status = RREG32(hub->vm_l2_pro_fault_status);
cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
+ fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+
+ /* for gfx fed error, kfd will handle it, return directly */
+ if (fed && amdgpu_ras_is_poison_mode_supported(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)) &&
+ (vmhub < AMDGPU_MMHUB0_START))
+ return 0;
+
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
#ifdef HAVE_STRUCT_XARRAY
amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
--
2.34.1
More information about the amd-gfx
mailing list