[Intel-gfx] [PATCH] Dump error state if HW wedged

Lukas Hejtmanek xhejtman at ics.muni.cz
Mon Mar 16 11:08:41 CET 2009


Hello,

the patch below adds initial error reporting in the case of HW wedge.

I would like to dump the last commands. Could I just remember addr of the last
object in execbuffer or can I somehow extract commands pointed by ACTHD and
DMA_FADD_P? Are these pointers reliable? 

I tried to print execbuffers while executed and I got something like this:
[   69.083602] [drm] i915_gem_execbuffer: object at offset 01310000
[   69.083604] [drm] 01310000: 54300004
[   69.083605] [drm] 01310004: 03f00200
[   69.083606] [drm] 01310008: 00000000
[   69.083607] [drm] 0131000c: 000f006a
[   69.083608] [drm] 01310010: 01d35000
[   69.083609] [drm] 01310014: 00000000
[   69.083610] [drm] 01310018: 54f00006
[   69.083611] [drm] 0131001c: 03cc1000
[   69.083612] [drm] 01310020: 00000330
[   69.083613] [drm] 01310024: 000d033e
[   69.083614] [drm] 01310028: 010f1000
[...]
[   69.096760] [drm] 01310138: 00000000
[   69.096761] [drm] 0131013c: 02000000
[   69.096762] [drm] 01310140: 00000000
[   69.096763] [drm] 01310144: 05000000
[   95.730045] [drm:i915_gem_idle] *ERROR* hardware wedged
[   95.730062] [drm:i965_dump_error_state] *ERROR* Instruction done 0xff65fafd
[   95.730079] [drm:i965_dump_error_state] *ERROR* Active head pointer addr 0x1310140
[   95.730096] [drm:i965_dump_error_state] *ERROR* Current addr in batch/ring buffer 0x1310200


so ACTHD points at the and of this buffer which is weird as it should point to
the beginning? And suppose that DMA_FADD_P is behind the buffer at all. Is it
OK?


diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 37427e4..d2c4a16 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3050,6 +3050,79 @@ i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
        return 0;
 }
 
+static void
+i965_dump_error_state(struct drm_device *dev)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       int val;
+
+       val = I915_READ(PGTBL_ER);
+       if(val)
+               DRM_ERROR("Page table errors:\n");
+       if(val & (1<<26))
+               DRM_ERROR("MT_INVALID_GTT_PTE ");
+       if(val & (1<<24))
+               DRM_ERROR("LC_INVALID_GTT_PTE ");
+       if(val & (1<<23))
+               DRM_ERROR("ISC_INVALID_GTT_PTE ");
+       if(val & (1<<22))
+               DRM_ERROR("ROC_INVALID_GTT_PTE ");
+       if(val & (1<<21))
+               DRM_ERROR("CS_VertedData_INVALID_GTT_PTE ");
+       if(val & (1<<20))
+               DRM_ERROR("CS_Command_INVALID_GTT_PTE ");
+       if(val & (1<<19))
+               DRM_ERROR("CS_INVALID_GTT ");
+       if(val & (1<<18))
+               DRM_ERROR("CRSR_INVALID_GTT_PTE ");
+       if(val & (1<<16))
+               DRM_ERROR("OVRL_INVALID_GTT_PTE ");
+       if(val & (1<<12))
+               DRM_ERROR("DISPC_INVALID_GTT_PTE ");
+       if(val & (1<<8))
+               DRM_ERROR("DISPB_INVALID_GTT_PTE ");
+       if(val & (1<<4))
+               DRM_ERROR("DISPA_INVALID_GTT_PTE ");
+       if(val & (1<<1))
+               DRM_ERROR("HOST_INVALID_PTE_DATA ");
+       if(val & (1<<0))
+               DRM_ERROR("HOST_INVALID_GTT_PTE ");
+       if(val) 
+               printk("\n");
+
+       val = I915_READ(IPEIR_I965);
+
+       if(val)
+               DRM_ERROR("Instruction parser errors:\n");
+
+       if(val & (1<<3))
+               DRM_ERROR("Batch buffer error ");
+       if(val & 7)
+               DRM_ERROR("Ring buffer 0 error ");
+
+       if(val) {
+               printk("\n");
+
+               val = I915_READ(IPEHR_I965);
+       
+               if(val)
+                       DRM_ERROR("Invalid instruction OP code 0x%x\n", val);
+       }
+
+       val = I915_READ(INSTDONE);
+       if(val != 0xffe7fffe) {
+               DRM_ERROR("Instruction done 0x%x\n", val);
+       }
+
+       val = I915_READ(ACTHD_I965);
+       if(val)
+               DRM_ERROR("Active head pointer addr 0x%x\n", val);
+       val = I915_READ(DMA_FADD_P);
+       if(val)
+               DRM_ERROR("Current addr in batch/ring buffer 0x%x\n", val);
+
+}
+
 int
 i915_gem_idle(struct drm_device *dev)
 {
@@ -3100,6 +3173,9 @@ i915_gem_idle(struct drm_device *dev)
                                DRM_ERROR("hardware wedged\n");
                                dev_priv->mm.wedged = 1;
                                DRM_WAKEUP(&dev_priv->irq_queue);
+                               if(IS_I965G(dev)) {
+                                       i965_dump_error_state(dev);
+                               }
                                break;
                        }
                }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 90600d8..bae5798 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -199,6 +199,7 @@
 #define   I965_FENCE_TILING_Y_SHIFT    1
 #define   I965_FENCE_REG_VALID         (1<<0)
 
+#define PGTBL_ER        0x02024
 /*
  * Instruction and interrupt control regs
  */
@@ -222,7 +223,11 @@
 #define PRB1_HEAD      0x02044 /* 915+ only */
 #define PRB1_START     0x02048 /* 915+ only */
 #define PRB1_CTL       0x0204c /* 915+ only */
+#define IPEIR_I965      0x02064
+#define IPEHR_I965      0x02068
+#define INSTDONE        0x0206c
 #define ACTHD_I965     0x02074
+#define DMA_FADD_P      0x02078
 #define HWS_PGA                0x02080
 #define HWS_ADDRESS_MASK       0xfffff000
 #define HWS_START_ADDRESS_SHIFT        4


-- 
Lukáš Hejtmánek



More information about the Intel-gfx mailing list