[PATCH 1/3] drm/radeon: add fault decode function for cayman/TN (v2)
alexdeucher at gmail.com
alexdeucher at gmail.com
Thu Jul 11 13:58:17 PDT 2013
From: Alex Deucher <alexander.deucher at amd.com>
Helpful for debugging GPUVM errors as we can see what
hw block and page generated the fault in the log.
v2: simplify fault decoding
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
---
drivers/gpu/drm/radeon/evergreen.c | 10 ++-
drivers/gpu/drm/radeon/ni.c | 161 ++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/radeon/nid.h | 16 ++++
3 files changed, 185 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 9e14007..bc6936a 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -139,6 +139,8 @@ void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
void evergreen_program_aspm(struct radeon_device *rdev);
extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
int ring, u32 cp_int_cntl);
+extern void cayman_vm_decode_fault(struct radeon_device *rdev,
+ u32 status, u32 addr);
static const u32 evergreen_golden_registers[] =
{
@@ -4629,6 +4631,7 @@ int evergreen_irq_process(struct radeon_device *rdev)
bool queue_hotplug = false;
bool queue_hdmi = false;
bool queue_thermal = false;
+ u32 status, addr;
if (!rdev->ih.enabled || rdev->shutdown)
return IRQ_NONE;
@@ -4915,11 +4918,14 @@ restart_ih:
break;
case 146:
case 147:
+ addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
+ status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+ addr);
dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
- RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+ status);
+ cayman_vm_decode_fault(rdev, status, addr);
/* reset addr and status */
WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
break;
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 465b17e..56bd4f3 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -2450,6 +2450,167 @@ void cayman_vm_fini(struct radeon_device *rdev)
{
}
+/**
+ * cayman_vm_decode_fault - print human readable fault info
+ *
+ * @rdev: radeon_device pointer
+ * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+ * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ *
+ * Print human readable fault information (cayman/TN).
+ */
+void cayman_vm_decode_fault(struct radeon_device *rdev,
+ u32 status, u32 addr)
+{
+ u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
+ u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
+ u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
+ char *block;
+
+ switch (mc_id) {
+ case 32:
+ case 16:
+ case 96:
+ case 80:
+ case 160:
+ case 144:
+ case 224:
+ case 208:
+ block = "CB";
+ break;
+ case 33:
+ case 17:
+ case 97:
+ case 81:
+ case 161:
+ case 145:
+ case 225:
+ case 209:
+ block = "CB_FMASK";
+ break;
+ case 34:
+ case 18:
+ case 98:
+ case 82:
+ case 162:
+ case 146:
+ case 226:
+ case 210:
+ block = "CB_CMASK";
+ break;
+ case 35:
+ case 19:
+ case 99:
+ case 83:
+ case 163:
+ case 147:
+ case 227:
+ case 211:
+ block = "CB_IMMED";
+ break;
+ case 36:
+ case 20:
+ case 100:
+ case 84:
+ case 164:
+ case 148:
+ case 228:
+ case 212:
+ block = "DB";
+ break;
+ case 37:
+ case 21:
+ case 101:
+ case 85:
+ case 165:
+ case 149:
+ case 229:
+ case 213:
+ block = "DB_HTILE";
+ break;
+ case 38:
+ case 22:
+ case 102:
+ case 86:
+ case 166:
+ case 150:
+ case 230:
+ case 214:
+ block = "SX";
+ break;
+ case 39:
+ case 23:
+ case 103:
+ case 87:
+ case 167:
+ case 151:
+ case 231:
+ case 215:
+ block = "DB_STEN";
+ break;
+ case 40:
+ case 24:
+ case 104:
+ case 88:
+ case 232:
+ case 216:
+ case 168:
+ case 152:
+ block = "TC_TFETCH";
+ break;
+ case 41:
+ case 25:
+ case 105:
+ case 89:
+ case 233:
+ case 217:
+ case 169:
+ case 153:
+ block = "TC_VFETCH";
+ break;
+ case 42:
+ case 26:
+ case 106:
+ case 90:
+ case 234:
+ case 218:
+ case 170:
+ case 154:
+ block = "VC";
+ break;
+ case 112:
+ block = "CP";
+ break;
+ case 113:
+ case 114:
+ block = "SH";
+ break;
+ case 115:
+ block = "VGT";
+ break;
+ case 178:
+ block = "IH";
+ break;
+ case 51:
+ block = "RLC";
+ break;
+ case 55:
+ block = "DMA";
+ break;
+ case 56:
+ block = "HDP";
+ break;
+ default:
+ block = "unknown";
+ break;
+ }
+
+ printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
+ protections, vmid, addr,
+ (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
+ block, mc_id);
+}
+
#define R600_ENTRY_VALID (1 << 0)
#define R600_PTE_SYSTEM (1 << 1)
#define R600_PTE_SNOOPED (1 << 2)
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index fe24a93..22421bc 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -133,6 +133,22 @@
#define VM_CONTEXT1_CNTL2 0x1434
#define VM_INVALIDATE_REQUEST 0x1478
#define VM_INVALIDATE_RESPONSE 0x147c
+#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC
+#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC
+#define PROTECTIONS_MASK (0xf << 0)
+#define PROTECTIONS_SHIFT 0
+ /* bit 0: range
+ * bit 2: pde0
+ * bit 3: valid
+ * bit 4: read
+ * bit 5: write
+ */
+#define MEMORY_CLIENT_ID_MASK (0xff << 12)
+#define MEMORY_CLIENT_ID_SHIFT 12
+#define MEMORY_CLIENT_RW_MASK (1 << 24)
+#define MEMORY_CLIENT_RW_SHIFT 24
+#define FAULT_VMID_MASK (0x7 << 25)
+#define FAULT_VMID_SHIFT 25
#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1518
#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x151c
#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C
--
1.7.7.5
More information about the dri-devel
mailing list