[Intel-gfx] [PATCH 119/190] drm/i915: Reduce amount of duplicate buffer information captured on error
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 11 02:45:03 PST 2016
When capturing the error state, we do not need to know about every
address space - just those that are related to the error. We know which
context is active at the time, therefore we know which VM are implicated
in the error. We can then restrict the VM which we report to the
relevant subset.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 9 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 197 ++++++++++++++--------------------
2 files changed, 87 insertions(+), 119 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1ecff535973e..f1447dbb3c55 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -519,6 +519,7 @@ struct drm_i915_error_state {
bool waiting;
int hangcheck_score;
enum intel_engine_hangcheck_action hangcheck_action;
+ struct i915_address_space *vm;
int num_requests;
/* our own tracking of ring head and tail */
@@ -579,17 +580,15 @@ struct drm_i915_error_state {
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
- s32 pinned:2;
u32 tiling:2;
u32 dirty:1;
u32 purgeable:1;
u32 userptr:1;
s32 ring:4;
u32 cache_level:3;
- } **active_bo, **pinned_bo;
-
- u32 *active_bo_count, *pinned_bo_count;
- u32 vm_count;
+ } *active_bo[I915_NUM_RINGS], *pinned_bo;
+ u32 active_bo_count[I915_NUM_RINGS], pinned_bo_count;
+ struct i915_address_space *active_vm[I915_NUM_RINGS];
};
struct intel_connector;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 706d956b6eb3..98d0a6a53cc7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,16 +42,6 @@ static const char *ring_str(int ring)
}
}
-static const char *pin_flag(int pinned)
-{
- if (pinned > 0)
- return " P";
- else if (pinned < 0)
- return " p";
- else
- return "";
-}
-
static const char *tiling_flag(int tiling)
{
switch (tiling) {
@@ -189,7 +179,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
{
int i;
- err_printf(m, " %s [%d]:\n", name, count);
+ err_printf(m, "%s [%d]:\n", name, count);
while (count--) {
err_printf(m, " %08x_%08x %8u %02x %02x [ ",
@@ -202,7 +192,6 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
err_printf(m, "%02x ", err->rseqno[i]);
err_printf(m, "] %02x", err->wseqno);
- err_puts(m, pin_flag(err->pinned));
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
err_puts(m, purgeable_flag(err->purgeable));
@@ -414,18 +403,25 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
for (i = 0; i < ARRAY_SIZE(error->ring); i++)
i915_ring_error_state(m, dev, error, i);
- for (i = 0; i < error->vm_count; i++) {
- err_printf(m, "vm[%d]\n", i);
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ if (error->active_vm[i] == NULL)
+ break;
- print_error_buffers(m, "Active",
+ err_printf(m, "Active vm[%d]\n", i);
+ for (j = 0; j < I915_NUM_RINGS; j++) {
+ if (error->ring[j].vm == error->active_vm[i])
+ err_printf(m, " %s\n",
+ dev_priv->ring[j].name);
+ }
+ print_error_buffers(m, " Buffers",
error->active_bo[i],
error->active_bo_count[i]);
-
- print_error_buffers(m, "Pinned",
- error->pinned_bo[i],
- error->pinned_bo_count[i]);
}
+ print_error_buffers(m, "Pinned (global)",
+ error->pinned_bo,
+ error->pinned_bo_count);
+
for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
obj = error->ring[i].batchbuffer;
if (obj) {
@@ -585,13 +581,10 @@ static void i915_error_state_free(struct kref *error_ref)
i915_error_object_free(error->semaphore_obj);
- for (i = 0; i < error->vm_count; i++)
+ for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
-
- kfree(error->active_bo);
- kfree(error->active_bo_count);
kfree(error->pinned_bo);
- kfree(error->pinned_bo_count);
+
kfree(error->overlay);
kfree(error->display);
kfree(error);
@@ -714,9 +707,6 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->read_domains = obj->base.read_domains;
err->write_domain = obj->base.write_domain;
err->fence_reg = obj->fence_reg;
- err->pinned = 0;
- if (i915_gem_obj_is_pinned(obj))
- err->pinned = 1;
err->tiling = obj->tiling_mode;
err->dirty = obj->dirty;
err->purgeable = obj->madv != I915_MADV_WILLNEED;
@@ -725,13 +715,17 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->cache_level = obj->cache_level;
}
-static u32 capture_active_bo(struct drm_i915_error_buffer *err,
- int count, struct list_head *head)
+static u32 capture_error_bo(struct drm_i915_error_buffer *err,
+ int count, struct list_head *head,
+ bool pinned_only)
{
struct i915_vma *vma;
int i = 0;
list_for_each_entry(vma, head, vm_link) {
+ if (pinned_only && !vma->pin_count)
+ continue;
+
capture_bo(err++, vma);
if (++i == count)
break;
@@ -740,28 +734,6 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
return i;
}
-static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
- int count, struct list_head *head,
- struct i915_address_space *vm)
-{
- struct drm_i915_gem_object *obj;
- struct drm_i915_error_buffer * const first = err;
- struct drm_i915_error_buffer * const last = err + count;
-
- list_for_each_entry(obj, head, global_list) {
- struct i915_vma *vma;
-
- if (err == last)
- break;
-
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (vma->vm == vm && vma->pin_count > 0)
- capture_bo(err++, vma);
- }
-
- return err - first;
-}
-
/* Generate a semi-unique error code. The code is not meant to have meaning, The
* code's only purpose is to try to prevent false duplicated bug reports by
* grossly estimating a GPU error state.
@@ -1009,9 +981,10 @@ static void i915_gem_record_rings(struct drm_device *dev,
struct i915_address_space *vm;
struct intel_ring *ring;
- vm = request->ctx && request->ctx->ppgtt ?
+ vm = request->ctx->ppgtt ?
&request->ctx->ppgtt->base :
&dev_priv->gtt.base;
+ error->ring[i].vm = vm;
/* We need to copy these to an anonymous buffer
* as the simplest method to avoid being overwritten
@@ -1099,89 +1072,83 @@ static void i915_gem_record_rings(struct drm_device *dev,
}
}
-/* FIXME: Since pin count/bound list is global, we duplicate what we capture per
- * VM.
- */
static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error,
struct i915_address_space *vm,
const int ndx)
{
- struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
- struct drm_i915_gem_object *obj;
+ struct drm_i915_error_buffer *active_bo;
struct i915_vma *vma;
int i;
i = 0;
list_for_each_entry(vma, &vm->active_list, vm_link)
i++;
- error->active_bo_count[ndx] = i;
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (vma->vm == vm && vma->pin_count > 0)
- i++;
- }
- error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
-
- if (i) {
+ active_bo = NULL;
+ if (i)
active_bo = kcalloc(i, sizeof(*active_bo), GFP_ATOMIC);
- if (active_bo)
- pinned_bo = active_bo + error->active_bo_count[ndx];
- }
-
if (active_bo)
- error->active_bo_count[ndx] =
- capture_active_bo(active_bo,
- error->active_bo_count[ndx],
- &vm->active_list);
-
- if (pinned_bo)
- error->pinned_bo_count[ndx] =
- capture_pinned_bo(pinned_bo,
- error->pinned_bo_count[ndx],
- &dev_priv->mm.bound_list, vm);
+ i = capture_error_bo(active_bo, i, &vm->active_list, false);
+ else
+ i = 0;
+
error->active_bo[ndx] = active_bo;
- error->pinned_bo[ndx] = pinned_bo;
+ error->active_bo_count[ndx] = i;
+ error->active_vm[ndx] = vm;
}
-static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
- struct drm_i915_error_state *error)
+static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
+ struct drm_i915_error_state *error)
{
- struct i915_address_space *vm;
- int cnt = 0, i = 0;
-
- list_for_each_entry(vm, &dev_priv->vm_list, global_link)
- cnt++;
-
- error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
- error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
- error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
- GFP_ATOMIC);
- error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
- GFP_ATOMIC);
-
- if (error->active_bo == NULL ||
- error->pinned_bo == NULL ||
- error->active_bo_count == NULL ||
- error->pinned_bo_count == NULL) {
- kfree(error->active_bo);
- kfree(error->active_bo_count);
- kfree(error->pinned_bo);
- kfree(error->pinned_bo_count);
-
- error->active_bo = NULL;
- error->active_bo_count = NULL;
- error->pinned_bo = NULL;
- error->pinned_bo_count = NULL;
- } else {
- list_for_each_entry(vm, &dev_priv->vm_list, global_link)
- i915_gem_capture_vm(dev_priv, error, vm, i++);
+ int cnt = 0, i, j;
+
+ BUILD_BUG_ON(ARRAY_SIZE(error->ring) > ARRAY_SIZE(error->active_bo));
+ BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
+ BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
+
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ if (error->ring[i].vm == NULL)
+ continue;
+
+ for (j = 0; j < i; j++)
+ if (error->ring[j].vm == error->ring[i].vm)
+ break;
+ if (j != i)
+ continue;
- error->vm_count = cnt;
+ i915_gem_capture_vm(dev_priv, error, error->ring[i].vm, cnt++);
}
}
+static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
+ struct drm_i915_error_state *error)
+{
+ struct i915_address_space *vm = &dev_priv->gtt.base;
+ struct drm_i915_error_buffer *bo;
+ struct i915_vma *vma;
+ int i, j;
+
+ i = 0;
+ list_for_each_entry(vma, &vm->active_list, vm_link)
+ i++;
+
+ j = 0;
+ list_for_each_entry(vma, &vm->inactive_list, vm_link)
+ j++;
+
+ bo = NULL;
+ if (i + j)
+ bo = kcalloc(i + j, sizeof(*bo), GFP_ATOMIC);
+ if (bo == NULL)
+ return;
+
+ i = capture_error_bo(bo, i, &vm->active_list, true);
+ j = capture_error_bo(bo + i, j, &vm->inactive_list, true);
+ error->pinned_bo_count = i + j;
+ error->pinned_bo = bo;
+}
+
/* Capture all registers which don't fit into another category. */
static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
@@ -1326,10 +1293,12 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
i915_capture_gen_state(dev_priv, error);
i915_capture_reg_state(dev_priv, error);
- i915_gem_capture_buffers(dev_priv, error);
i915_gem_record_fences(dev, error);
i915_gem_record_rings(dev, error);
+ i915_capture_active_buffers(dev_priv, error);
+ i915_capture_pinned_buffers(dev_priv, error);
+
do_gettimeofday(&error->time);
error->overlay = intel_overlay_capture_error_state(dev);
--
2.7.0.rc3
More information about the Intel-gfx
mailing list