[Intel-gfx] [PATCH 07/14] drm/i915/error: Do a better job of disambiguating VMAs

Michel Thierry michel.thierry at intel.com
Tue Jul 15 18:20:44 CEST 2014


From: Ben Widawsky <benjamin.widawsky at intel.com>

Some of the original PPGTT patches in this area where unmerged, and this
left a lot of confusion in our error capture with regard to which vm/obj
we want to capture. There have been at least a couple of patches from
Chris, and myself to try to fix this up; so here is another shot. Nobody
running without full PPGTT is effected by this, and that is probably why
nobody has bothered to fix it yet.

Instead of using any of the global lists to find the VMAs we want to
capture, we use the union of the active, and the inactive list in the
VM. This allows us to replace our capture_bo with capture_vma, and know
all the VMAs we want to capture are valid.

I could have probably figured out a way to reuse mm_list. As we've had
bugs here before in the shrinker, I think the best way forward is to get
it working, and then optimize it later.

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.h   |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 39 ++++++++++++++++++++++-------------
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5188936..0b2b982 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2115,6 +2115,7 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&vma->vma_link);
+	INIT_LIST_HEAD(&vma->pin_capture_link);
 	INIT_LIST_HEAD(&vma->mm_list);
 	INIT_LIST_HEAD(&vma->exec_list);
 	vma->vm = vm;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8d6f7c1..1d75801 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -126,6 +126,8 @@ struct i915_vma {
 
 	struct list_head vma_link; /* Link in the object's VMA list */
 
+	struct list_head pin_capture_link; /* Link in the error capture */
+
 	/** This vma's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ac101cd..4ff819e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -677,14 +677,14 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
 static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
 			     int count, struct list_head *head)
 {
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 	int i = 0;
 
-	list_for_each_entry(obj, head, global_list) {
-		if (!i915_gem_obj_is_pinned(obj))
+	list_for_each_entry(vma, head, pin_capture_link) {
+		if (!i915_gem_obj_is_pinned(vma->obj))
 			continue;
 
-		capture_bo(err++, obj);
+		capture_bo(err++, vma->obj);
 		if (++i == count)
 			break;
 	}
@@ -1031,21 +1031,32 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 				const int vm_ndx)
 {
 	struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
-	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int active_vma_count = 0;
+	int vma_pin_count = 0;
+	LIST_HEAD(pinned_vma);
 
-	list_for_each_entry(vma, &vm->active_list, mm_list)
+	list_for_each_entry(vma, &vm->active_list, mm_list) {
 		active_vma_count++;
+		if (vma->pin_count) {
+			vma_pin_count++;
+			list_move_tail(&vma->pin_capture_link, &pinned_vma);
+		}
+	}
 
-	error->active_bo_count[vm_ndx] = active_vma_count;
-
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		if (i915_gem_obj_is_pinned(obj))
-			active_vma_count++;
+	list_for_each_entry(vma, &vm->inactive_list, mm_list) {
+		/* Certain objects may be on the inactive list, but pinned, when
+		 * in the global GGTT. */
+		if (WARN_ON(!i915_is_ggtt(vm) &&
+			    vma->pin_count &&
+			    !(vma->exec_entry->flags & (1<<31)))) { /* FIXME: need the actual flag */
+			vma_pin_count++;
+			list_move_tail(&vma->pin_capture_link, &pinned_vma);
+		}
+	}
 
-	/* XXX: this is an incorrect measurement of pinned BOs */
-	error->pinned_bo_count[vm_ndx] = active_vma_count - error->active_bo_count[vm_ndx];
+	error->active_bo_count[vm_ndx] = active_vma_count;
+	error->pinned_bo_count[vm_ndx] = vma_pin_count;
 
 	if (active_vma_count) {
 		active_bo = kcalloc(active_vma_count, sizeof(*active_bo), GFP_ATOMIC);
@@ -1063,7 +1074,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 		error->pinned_bo_count[vm_ndx] =
 			capture_pinned_bo(pinned_bo,
 					  error->pinned_bo_count[vm_ndx],
-					  &dev_priv->mm.bound_list);
+					  &pinned_vma);
 	error->active_bo[vm_ndx] = active_bo;
 	error->pinned_bo[vm_ndx] = pinned_bo;
 }
-- 
1.9.0




More information about the Intel-gfx mailing list