[Intel-gfx] [PATCH 42/42] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand

oscar.mateo at intel.com oscar.mateo at intel.com
Fri Jul 11 17:49:34 CEST 2014


From: Oscar Mateo <oscar.mateo at intel.com>

Same as with the context, pinning to GGTT regardless is harmful (it
badly fragments the GGTT and can even exhaust it).

Unfortunately, this case is also more complex than the previous one
because we need to map and access the ringbuffer in several places
along the execbuffer path (and we cannot make do by leaving the
default ringbuffer pinned, as before). Also, the context object
itself contains a pointer to the ringbuffer address that we have to
keep updated if we are going to allow the ringbuffer to move around.

Also as before, this patch is not probably ready for merging yet:
- The debugfs entries that access the context backing object need to be
  checked and modified to pin the object accordingly.
- A recovery from reset probably needs to unpin some objects as well.
- Something is not right during module re-loading (occasional failure
  in tests/drv_module_reload).
- Lots of testing needed (and maybe some targeted IGTs?)-

Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 115 +++++++++++++++++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.c |  83 +++++++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   4 ++
 3 files changed, 135 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 244269b..9e00eb5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -317,7 +317,9 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 }
 
-static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail)
+static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
+				    struct drm_i915_gem_object *ring_obj,
+				    u32 tail)
 {
 	struct page *page;
 	uint32_t *reg_state;
@@ -326,6 +328,7 @@ static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tai
 	reg_state = kmap_atomic(page);
 
 	reg_state[CTX_RING_TAIL+1] = tail;
+	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
 	kunmap_atomic(reg_state);
 
@@ -338,41 +341,61 @@ static int execlists_submit_context(struct intel_engine_cs *ring,
 {
 	struct intel_context *to0 = req0->ctx;
 	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
 	struct drm_i915_gem_object *ctx_obj1 = NULL;
-	bool ctx0_pinned = false;
+	bool ctx0_pinned = false, rb0_pinned = false, ctx1_pinned = false;
 	int ret;
 
 	BUG_ON(!ctx_obj0);
-	if (to0 != ring->default_context && !req0->elsp_submitted) {
-		ret = i915_gem_obj_ggtt_pin(ctx_obj0, GEN8_LR_CONTEXT_ALIGN, 0);
+	if (!req0->elsp_submitted) {
+		if (to0 != ring->default_context) {
+			ret = i915_gem_obj_ggtt_pin(ctx_obj0, GEN8_LR_CONTEXT_ALIGN, 0);
+			if (ret)
+				return ret;
+			ctx0_pinned = true;
+		}
+
+		ret = i915_gem_obj_ggtt_pin(ringbuf0->obj, PAGE_SIZE, 0);
 		if (ret)
-			return ret;
-		ctx0_pinned = true;
+			goto error;
+		rb0_pinned = true;
 	}
 
-	execlists_ctx_write_tail(ctx_obj0, req0->tail);
+	execlists_update_context(ctx_obj0, ringbuf0->obj, req0->tail);
 
 	if (req1) {
 		struct intel_context *to1 = req1->ctx;
+		struct intel_ringbuffer *ringbuf1 = to1->engine[ring->id].ringbuf;
 
 		ctx_obj1 = to1->engine[ring->id].state;
 
 		BUG_ON(!ctx_obj1);
 		if (to1 != ring->default_context) {
 			ret = i915_gem_obj_ggtt_pin(ctx_obj1, GEN8_LR_CONTEXT_ALIGN, 0);
-			if (ret) {
-				if (ctx0_pinned)
-					i915_gem_object_ggtt_unpin(ctx_obj0);
-				return ret;
-			}
+			if (ret)
+				goto error;
+			ctx1_pinned = true;
 		}
 
-		execlists_ctx_write_tail(ctx_obj1, req1->tail);
+		ret = i915_gem_obj_ggtt_pin(ringbuf1->obj, PAGE_SIZE, 0);
+		if (ret)
+			goto error;
+
+		execlists_update_context(ctx_obj1, ringbuf1->obj, req1->tail);
 	}
 
 	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
 
 	return 0;
+
+error:
+	if (ctx0_pinned)
+		i915_gem_object_ggtt_unpin(ctx_obj0);
+	if (rb0_pinned)
+		i915_gem_object_ggtt_unpin(ringbuf0->obj);
+	if (ctx1_pinned)
+		i915_gem_object_ggtt_unpin(ctx_obj1);
+	return ret;
 }
 
 static void execlists_context_unqueue(struct intel_engine_cs *ring)
@@ -437,8 +460,11 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 			WARN(head_req->elsp_submitted == 0,
 					"Never submitted head request\n");
 			if (--head_req->elsp_submitted <= 0) {
+				struct intel_ringbuffer *ringbuf =
+						head_req->ctx->engine[ring->id].ringbuf;
 				if (head_req->ctx != ring->default_context)
 					i915_gem_object_ggtt_unpin(ctx_obj);
+				i915_gem_object_ggtt_unpin(ringbuf->obj);
 				list_del(&head_req->execlist_link);
 				queue_work(dev_priv->wq, &head_req->work);
 				return true;
@@ -670,9 +696,15 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 		}
 	}
 
+	if (atomic_inc_return(&ringbuf->unmap_count) == 1) {
+		ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+		if (ret)
+			return ret;
+	}
+
 	ret = execlists_move_to_gpu(ringbuf, vmas);
 	if (ret)
-		return ret;
+		goto out;
 
 	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
 	instp_mask = I915_EXEC_CONSTANTS_MASK;
@@ -685,24 +717,27 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 	case I915_EXEC_CONSTANTS_ABSOLUTE:
 		if (ring != &dev_priv->ring[RCS]) {
 			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 		break;
 
 	case I915_EXEC_CONSTANTS_REL_SURFACE:
 		DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 
 	default:
 		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	if (ring == &dev_priv->ring[RCS] &&
 			instp_mode != dev_priv->relative_constants_mode) {
 		ret = intel_logical_ring_begin(ringbuf, 4);
 		if (ret)
-			return ret;
+			goto out;
 
 		intel_logical_ring_emit(ringbuf, MI_NOOP);
 		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
@@ -715,17 +750,21 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 
 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
 		DRM_DEBUG("sol reset is gen7 only\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	ret = ring->emit_bb_start(ringbuf, exec_start, flags);
 	if (ret)
-		return ret;
+		goto out;
 
 	i915_gem_execbuffer_move_to_active(vmas, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
-	return 0;
+out:
+	if (atomic_dec_return(&ringbuf->unmap_count) == 0)
+		intel_unpin_ringbuffer_obj(ringbuf);
+	return ret;
 }
 
 void intel_logical_ring_stop(struct intel_engine_cs *ring)
@@ -1492,6 +1531,12 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
 	if (so.rodata == NULL)
 		return 0;
 
+	if (atomic_inc_return(&ringbuf->unmap_count) == 1) {
+		ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
+		if (ret)
+			return ret;
+	}
+
 	ret = ring->emit_bb_start(ringbuf,
 			so.ggtt_offset,
 			I915_DISPATCH_SECURE);
@@ -1503,6 +1548,8 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
 	ret = __i915_add_request(ring, file, so.obj, NULL);
 	/* intel_logical_ring_add_request moves object to inactive if it fails */
 out:
+	if (atomic_dec_return(&ringbuf->unmap_count) == 0)
+		intel_unpin_ringbuffer_obj(ringbuf);
 	i915_gem_render_state_fini(&so);
 	return ret;
 }
@@ -1554,7 +1601,13 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
 	reg_state[CTX_RING_TAIL+1] = 0;
 	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
+
+	ret = i915_gem_obj_ggtt_pin(ring_obj, PAGE_SIZE, 0);
+	if (ret)
+		goto error;
 	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
+	i915_gem_object_ggtt_unpin(ring_obj);
+
 	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
 	reg_state[CTX_RING_BUFFER_CONTROL+1] =
 			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
@@ -1607,13 +1660,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
 	}
 
+error:
 	kunmap_atomic(reg_state);
 
 	ctx_obj->dirty = 1;
 	set_page_dirty(page);
 	i915_gem_object_unpin_pages(ctx_obj);
 
-	return 0;
+	return ret;
 }
 
 /**
@@ -1730,16 +1784,13 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	ringbuf->space = ringbuf->size;
 	ringbuf->last_retired_head = -1;
 
-	/* TODO: For now we put this in the mappable region so that we can reuse
-	 * the existing ringbuffer code which ioremaps it. When we start
-	 * creating many contexts, this will no longer work and we must switch
-	 * to a kmapish interface.
-	 */
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
-				ring->name, ret);
-		goto error;
+	if (ringbuf->obj == NULL) {
+		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
+					ring->name, ret);
+			goto error;
+		}
 	}
 
 	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6e604c9..020588c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1513,13 +1513,42 @@ static int init_phys_status_page(struct intel_engine_cs *ring)
 	return 0;
 }
 
-void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 {
-	if (!ringbuf->obj)
-		return;
-
 	iounmap(ringbuf->virtual_start);
+	ringbuf->virtual_start = NULL;
 	i915_gem_object_ggtt_unpin(ringbuf->obj);
+}
+
+int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
+				     struct intel_ringbuffer *ringbuf)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_object *obj = ringbuf->obj;
+	int ret;
+
+	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret) {
+		i915_gem_object_ggtt_unpin(obj);
+		return ret;
+	}
+
+	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
+			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
+	if (ringbuf->virtual_start == NULL) {
+		i915_gem_object_ggtt_unpin(obj);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+{
 	drm_gem_object_unreference(&ringbuf->obj->base);
 	ringbuf->obj = NULL;
 }
@@ -1527,12 +1556,7 @@ void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 			       struct intel_ringbuffer *ringbuf)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj;
-	int ret;
-
-	if (ringbuf->obj)
-		return 0;
 
 	obj = NULL;
 	if (!HAS_LLC(dev))
@@ -1545,30 +1569,9 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 	/* mark ring buffers as read-only from GPU side by default */
 	obj->gt_ro = 1;
 
-	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
-	if (ret)
-		goto err_unref;
-
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret)
-		goto err_unpin;
-
-	ringbuf->virtual_start =
-		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
-				ringbuf->size);
-	if (ringbuf->virtual_start == NULL) {
-		ret = -EINVAL;
-		goto err_unpin;
-	}
-
 	ringbuf->obj = obj;
-	return 0;
 
-err_unpin:
-	i915_gem_object_ggtt_unpin(obj);
-err_unref:
-	drm_gem_object_unreference(&obj->base);
-	return ret;
+	return 0;
 }
 
 static int intel_init_ring_buffer(struct drm_device *dev,
@@ -1606,10 +1609,19 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 			goto error;
 	}
 
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret);
-		goto error;
+	if (ringbuf->obj == NULL) {
+		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret);
+			goto error;
+		}
+
+		ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", ring->name, ret);
+			intel_destroy_ringbuffer_obj(ringbuf);
+			goto error;
+		}
 	}
 
 	/* Workaround an erratum on the i830 which causes a hang if
@@ -1647,6 +1659,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	intel_stop_ring_buffer(ring);
 	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
+	intel_unpin_ringbuffer_obj(ringbuf);
 	intel_destroy_ringbuffer_obj(ringbuf);
 	ring->preallocated_lazy_request = NULL;
 	ring->outstanding_lazy_seqno = 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 905d1ba..feee5e1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -94,6 +94,7 @@ struct intel_ring_hangcheck {
 struct intel_ringbuffer {
 	struct drm_i915_gem_object *obj;
 	void __iomem *virtual_start;
+	atomic_t unmap_count;
 
 	struct intel_engine_cs *ring;
 	struct intel_context *ctx;
@@ -371,6 +372,9 @@ intel_write_status_page(struct intel_engine_cs *ring,
 #define I915_GEM_HWS_SCRATCH_INDEX	0x30
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
+void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
+int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
+				     struct intel_ringbuffer *ringbuf);
 void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
 int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 			       struct intel_ringbuffer *ringbuf);
-- 
1.9.0




More information about the Intel-gfx mailing list