[PATCH 3/3] pipelined-fence

Chris Wilson chris at chris-wilson.co.uk
Thu Jun 15 22:49:27 UTC 2017


---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  27 +++--
 drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 163 ++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_vma.h            |   5 +
 3 files changed, 166 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9d7fb7e68555..4a200b5b2821 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -43,10 +43,9 @@
 #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
 
 #define  __EXEC_OBJECT_HAS_PIN		(1<<31)
-#define  __EXEC_OBJECT_HAS_FENCE	(1<<30)
-#define  __EXEC_OBJECT_NEEDS_MAP	(1<<29)
-#define  __EXEC_OBJECT_NEEDS_BIAS	(1<<28)
-#define  __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
+#define  __EXEC_OBJECT_NEEDS_MAP	(1<<30)
+#define  __EXEC_OBJECT_NEEDS_BIAS	(1<<29)
+#define  __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<29) /* all of the above */
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -115,8 +114,8 @@ static inline void
 __eb_unreserve_vma(struct i915_vma *vma,
 		   const struct drm_i915_gem_exec_object2 *entry)
 {
-	if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
-		__i915_vma_unpin_fence(vma);
+	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE))
+		i915_vma_unreserve_fence(vma);
 
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 		__i915_vma_unpin(vma);
@@ -128,7 +127,7 @@ eb_unreserve_vma(struct i915_vma *vma)
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 
 	__eb_unreserve_vma(vma, entry);
-	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
+	entry->flags &= ~__EXEC_OBJECT_HAS_PIN;
 }
 
 static void
@@ -812,12 +811,9 @@ eb_reserve_vma(struct i915_vma *vma,
 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
 
 	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-		ret = i915_vma_pin_fence(vma);
+		ret = i915_vma_reserve_fence(vma);
 		if (ret)
 			return ret;
-
-		if (vma->fence)
-			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
 	if (entry->offset != vma->node.start) {
@@ -1107,6 +1103,12 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
 			eb->request->capture_list = capture;
 		}
 
+		if (vma->exec_entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			ret = i915_vma_emit_pipelined_fence(vma, eb->request);
+			if (ret)
+				return ret;
+		}
+
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;
 
@@ -1286,9 +1288,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 		if (!obj->cache_dirty && gpu_write_needs_clflush(obj))
 			obj->cache_dirty = true;
 	}
-
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		i915_gem_active_set(&vma->last_fence, req);
 }
 
 static void eb_export_fence(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index a2806e76395d..41e19751b889 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -55,10 +55,9 @@
  * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
  */
 
-#define pipelined 0
-
-static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct drm_i915_gem_request *pipelined)
 {
 	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
@@ -110,11 +109,30 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 		I915_WRITE(fence_reg_hi, upper_32_bits(val));
 		I915_WRITE(fence_reg_lo, lower_32_bits(val));
 		POSTING_READ(fence_reg_lo);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 8);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(3);
+		*cs++ = i915_mmio_reg_offset(fence_reg_lo);
+		*cs++ = 0;
+		*cs++ = i915_mmio_reg_offset(fence_reg_hi);
+		*cs++ = upper_32_bits(val);
+		*cs++ = i915_mmio_reg_offset(fence_reg_lo);
+		*cs++ = lower_32_bits(val);
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct drm_i915_gem_request *pipelined)
 {
 	u32 val;
 
@@ -150,11 +168,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
 
 		I915_WRITE(reg, val);
 		POSTING_READ(reg);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 4);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
+		*cs++ = val;
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct drm_i915_gem_request *pipelined)
 {
 	u32 val;
 
@@ -182,29 +215,49 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
 
 		I915_WRITE(reg, val);
 		POSTING_READ(reg);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 4);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
+		*cs++ = val;
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void fence_write(struct drm_i915_fence_reg *fence,
-			struct i915_vma *vma)
+static int fence_write(struct drm_i915_fence_reg *fence,
+		       struct i915_vma *vma,
+		       struct drm_i915_gem_request *rq)
 {
+	int err;
+
 	/* Previous access through the fence register is marshalled by
 	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
 	 * and explicitly managed for internal users.
 	 */
 
 	if (IS_GEN2(fence->i915))
-		i830_write_fence_reg(fence, vma);
+		err = i830_write_fence_reg(fence, vma, rq);
 	else if (IS_GEN3(fence->i915))
-		i915_write_fence_reg(fence, vma);
+		err = i915_write_fence_reg(fence, vma, rq);
 	else
-		i965_write_fence_reg(fence, vma);
+		err = i965_write_fence_reg(fence, vma, rq);
+	if (err)
+		return err;
 
 	/* Access through the fenced region afterwards is
 	 * ordered by the posting reads whilst writing the registers.
 	 */
 
 	fence->dirty = false;
+	return 0;
 }
 
 static int fence_update(struct drm_i915_fence_reg *fence,
@@ -253,7 +306,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	 * to the runtime resume, see i915_gem_restore_fences().
 	 */
 	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
-		fence_write(fence, vma);
+		fence_write(fence, vma, NULL);
 		intel_runtime_pm_put(fence->i915);
 	}
 
@@ -369,6 +422,86 @@ i915_vma_pin_fence(struct i915_vma *vma)
 	return err;
 }
 
+int i915_vma_reserve_fence(struct i915_vma *vma)
+{
+	struct drm_i915_fence_reg *fence;
+
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+	fence = vma->fence;
+	if (!fence && i915_gem_object_is_tiled(vma->obj)) {
+		fence = fence_find(vma->vm->i915);
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+
+		vma->fence = fence;
+		fence->dirty = true;
+	}
+	if (fence)
+		fence->pin_count++;
+
+	return 0;
+}
+
+int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
+				  struct drm_i915_gem_request *rq)
+{
+	struct drm_i915_fence_reg *fence = vma->fence;
+	struct i915_vma *set;
+	int err;
+
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+	if (!fence)
+		return 0;
+
+	if (!fence->dirty)
+		goto out;
+
+	if (fence->vma) {
+		struct drm_i915_gem_request *prev;
+
+		prev = i915_gem_active_raw(&fence->vma->last_fence,
+					   &fence->i915->drm.struct_mutex);
+		if (prev) {
+			err = i915_gem_request_await_dma_fence(rq,
+							       &prev->fence);
+			if (err)
+				return err;
+		}
+
+		i915_gem_active_set(&fence->vma->last_fence, rq);
+	}
+
+	set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL,
+	err = fence_write(fence, set, rq);
+	if (err)
+		return err;
+
+	fence->vma = set;
+	list_move_tail(&fence->link, &fence->i915->mm.fence_list);
+
+out:
+	i915_gem_active_set(&vma->last_fence, rq);
+	return 0;
+}
+
+void i915_vma_unreserve_fence(struct i915_vma *vma)
+{
+	struct drm_i915_fence_reg *fence;
+
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+	fence = vma->fence;
+	if (!fence)
+		return;
+
+	if (fence->vma != vma)
+		vma->fence = NULL;
+
+	fence->pin_count--;
+}
+
 /**
  * i915_gem_revoke_fences - revoke fence state
  * @dev_priv: i915 device private
@@ -422,7 +555,7 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
 			vma = NULL;
 		}
 
-		fence_write(reg, vma);
+		fence_write(reg, vma, NULL);
 		reg->vma = vma;
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index c199c295f8d8..aa0ea45181f9 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -361,5 +361,10 @@ i915_vma_unpin_fence(struct i915_vma *vma)
 		__i915_vma_unpin_fence(vma);
 }
 
+int __must_check i915_vma_reserve_fence(struct i915_vma *vma);
+int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
+				  struct drm_i915_gem_request *rq);
+void i915_vma_unreserve_fence(struct i915_vma *vma);
+
 #endif
 
-- 
2.11.0



More information about the Intel-gfx-trybot mailing list