[Intel-gfx] [RFC PATCH 04/12] drm/i915: Capture some extra small details in the GPU error state
Oscar Mateo
oscar.mateo at intel.com
Fri Oct 27 18:01:07 UTC 2017
Namely:
- Capture tiling per drm_i915_error_object
- Capture the LRC descriptor per active request
- Capture the wa_batchbuffer unconditionally
- Capture the GAM_ECOCHK register for all GENs
They don't increase the size greatly, and they can be useful even in
the existing GPU error dump (but I will need them for sure in AubCrash).
Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
Cc: Chris Wilson <chris at chris-wsilon.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++----------
2 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 366ba74..f64871b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -992,6 +992,7 @@ struct i915_gpu_state {
struct drm_i915_error_object {
u64 gtt_offset;
u64 gtt_size;
+ u32 tiling:2;
int page_count;
int unused;
u32 *pages[0];
@@ -1006,6 +1007,7 @@ struct i915_gpu_state {
long jiffies;
pid_t pid;
u32 context;
+ u64 lrc_desc;
int priority;
int ban_score;
u32 seqno;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 653fb69..befd17c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -897,6 +897,7 @@ void __i915_gpu_state_free(struct kref *error_ref)
dst->gtt_offset = vma->node.start;
dst->gtt_size = vma->node.size;
+ dst->tiling = i915_gem_object_get_tiling(vma->obj);
dst->page_count = 0;
dst->unused = 0;
@@ -1270,16 +1271,21 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
static void record_request(struct drm_i915_gem_request *request,
struct drm_i915_error_request *erq)
{
- erq->context = request->ctx->hw_id;
+ struct i915_gem_context *ctx = request->ctx;
+ struct intel_engine_cs *engine = request->engine;
+ struct intel_context *ce = &ctx->engine[engine->id];
+
+ erq->context = ctx->hw_id;
+ erq->lrc_desc = ce->lrc_desc;
erq->priority = request->priotree.priority;
- erq->ban_score = atomic_read(&request->ctx->ban_score);
+ erq->ban_score = atomic_read(&ctx->ban_score);
erq->seqno = request->global_seqno;
erq->jiffies = request->emitted_jiffies;
erq->head = request->head;
erq->tail = request->tail;
rcu_read_lock();
- erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
+ erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0;
rcu_read_unlock();
}
@@ -1442,10 +1448,10 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
i915_error_object_create(dev_priv,
request->batch);
- if (HAS_BROKEN_CS_TLB(dev_priv))
- ee->wa_batchbuffer =
- i915_error_object_create(dev_priv,
- engine->scratch);
+ ee->wa_batchbuffer =
+ i915_error_object_create(dev_priv,
+ engine->scratch);
+
request_record_user_bo(request, ee);
ee->ctx =
@@ -1619,10 +1625,8 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
error->ccid = I915_READ(CCID);
/* 3: Feature specific registers */
- if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
- error->gam_ecochk = I915_READ(GAM_ECOCHK);
+ if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
error->gac_eco = I915_READ(GAC_ECO_BITS);
- }
/* 4: Everything else */
if (INTEL_GEN(dev_priv) >= 8) {
@@ -1641,6 +1645,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
}
error->eir = I915_READ(EIR);
error->pgtbl_er = I915_READ(PGTBL_ER);
+ error->gam_ecochk = I915_READ(GAM_ECOCHK);
}
static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
--
1.9.1
More information about the Intel-gfx
mailing list