[PATCH 43/54] drm/i915: Emit pipelined fence changes

Sat Jun 30 12:50:12 UTC 2018

Many years ago, long before requests, we tried doing this. We never
quite got it right, but now with requests we have the tracking to do the
job properly!

One of the stall points for gen2/gen3 is the use of fence registers for
GPU operations. There are only a few available, and currently if we
exhaust the available fence register we must stall the GPU between
batches. By pipelining the fence register writes, we can avoid the stall
and continuously emit the batches. The challenge is remembering to wait
for those pipelined LRI before accessing the fence with the CPU, and
that is what our request tracking makes easy.

An important use case is that this allows us to distinguish the
non-pipelined case and use a blocking wait to acquire a fence, thus
allowing us to use fine-grained locking inside e.g. i915_gem_fault().

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  17 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 283 +++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_fence_reg.h  |   5 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.h        |   1 +
 drivers/gpu/drm/i915/i915_vma.c            |   3 -
 drivers/gpu/drm/i915/i915_vma.h            |  24 +-
 8 files changed, 264 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c296848917d3..71465438c691 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -928,7 +928,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 		struct i915_vma *vma = ggtt->fence_regs[i].vma;
 
 		seq_printf(m, "Fence %d, pin count = %d, object = ",
-			   i, ggtt->fence_regs[i].pin_count);
+			   i, atomic_read(&ggtt->fence_regs[i].pin_count));
 		if (!vma)
 			seq_puts(m, "unused");
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 77fa86670e9b..62721ddd5bee 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -409,11 +409,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
 		return false;
 
 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		if (unlikely(i915_vma_pin_fence(vma))) {
+		if (unlikely(i915_vma_reserve_fence(vma))) {
 			i915_vma_unpin(vma);
 			return false;
 		}
 
+		exec_flags &= ~EXEC_OBJECT_ASYNC;
 		if (vma->fence)
 			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
@@ -426,11 +427,8 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 {
 	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 
-	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) {
-		mutex_lock(&vma->vm->mutex);
+	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
 		__i915_vma_unpin_fence(vma);
-		mutex_unlock(&vma->vm->mutex);
-	}
 
 	__i915_vma_unpin(vma);
 }
@@ -631,12 +629,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 	}
 
 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		err = i915_vma_pin_fence(vma);
+		err = i915_vma_reserve_fence(vma);
 		if (unlikely(err)) {
 			i915_vma_unpin(vma);
 			return err;
 		}
 
+		exec_flags &= ~EXEC_OBJECT_ASYNC;
 		if (vma->fence)
 			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
@@ -1817,6 +1816,12 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		if (flags & EXEC_OBJECT_ASYNC)
 			continue;
 
+		if (unlikely(flags & EXEC_OBJECT_NEEDS_FENCE)) {
+			err = i915_vma_emit_pipelined_fence(vma, eb->request);
+			if (err)
+				return err;
+		}
+
 		err = i915_request_await_object
 			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
 		if (err)
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index e458ae189f82..e0ded02a4b02 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -55,10 +55,9 @@
  * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
  */
 
-#define pipelined 0
-
-static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct i915_request *pipelined)
 {
 	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
@@ -95,6 +94,8 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 	if (!pipelined) {
 		struct drm_i915_private *dev_priv = fence->ggtt->vm.i915;
 
+		lockdep_assert_held(&fence->ggtt->vm.mutex);
+
 		/* To w/a incoherency with non-atomic 64-bit register updates,
 		 * we split the 64-bit update into two 32-bit writes. In order
 		 * for a partial fence not to be evaluated between writes, we
@@ -110,11 +111,30 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 		I915_WRITE(fence_reg_hi, upper_32_bits(val));
 		I915_WRITE(fence_reg_lo, lower_32_bits(val));
 		POSTING_READ(fence_reg_lo);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 8);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(3);
+		*cs++ = i915_mmio_reg_offset(fence_reg_lo);
+		*cs++ = 0;
+		*cs++ = i915_mmio_reg_offset(fence_reg_hi);
+		*cs++ = upper_32_bits(val);
+		*cs++ = i915_mmio_reg_offset(fence_reg_lo);
+		*cs++ = lower_32_bits(val);
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct i915_request *pipelined)
 {
 	u32 val;
 
@@ -148,13 +168,30 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
 		struct drm_i915_private *dev_priv = fence->ggtt->vm.i915;
 		i915_reg_t reg = FENCE_REG(fence->id);
 
+		lockdep_assert_held(&fence->ggtt->vm.mutex);
+
 		I915_WRITE(reg, val);
 		POSTING_READ(reg);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 4);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
+		*cs++ = val;
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct i915_request *pipelined)
 {
 	u32 val;
 
@@ -180,33 +217,55 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
 		struct drm_i915_private *dev_priv = fence->ggtt->vm.i915;
 		i915_reg_t reg = FENCE_REG(fence->id);
 
+		lockdep_assert_held(&fence->ggtt->vm.mutex);
+
 		I915_WRITE(reg, val);
 		POSTING_READ(reg);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 4);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
+		*cs++ = val;
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void fence_write(struct drm_i915_fence_reg *fence,
-			struct i915_vma *vma)
+static int fence_write(struct drm_i915_fence_reg *fence,
+		       struct i915_vma *vma,
+		       struct i915_request *rq)
 {
-	lockdep_assert_held(&fence->ggtt->vm.mutex);
+	int err;
 
-	/* Previous access through the fence register is marshalled by
+	/*
+	 * Previous access through the fence register is marshalled by
 	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
 	 * and explicitly managed for internal users.
 	 */
 
 	if (IS_GEN2(fence->ggtt->vm.i915))
-		i830_write_fence_reg(fence, vma);
+		err = i830_write_fence_reg(fence, vma, rq);
 	else if (IS_GEN3(fence->ggtt->vm.i915))
-		i915_write_fence_reg(fence, vma);
+		err = i915_write_fence_reg(fence, vma, rq);
 	else
-		i965_write_fence_reg(fence, vma);
+		err = i965_write_fence_reg(fence, vma, rq);
+	if (err)
+		return err;
 
-	/* Access through the fenced region afterwards is
+	/*
+	 * Access through the fenced region afterwards is
 	 * ordered by the posting reads whilst writing the registers.
 	 */
 
 	fence->dirty = false;
+	return 0;
 }
 
 static int fence_update(struct drm_i915_fence_reg *fence,
@@ -217,19 +276,16 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 
 	lockdep_assert_held(&ggtt->vm.mutex);
 
+	ret = i915_gem_active_wait(&fence->pipelined, I915_WAIT_INTERRUPTIBLE);
+	if (ret)
+		return ret;
+
 	if (vma) {
 		if (!i915_vma_is_map_and_fenceable(vma))
 			return -EINVAL;
 
-		if (WARN(!i915_gem_object_get_stride(vma->obj) ||
-			 !i915_gem_object_get_tiling(vma->obj),
-			 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
-			 i915_gem_object_get_stride(vma->obj),
-			 i915_gem_object_get_tiling(vma->obj)))
-			return -EINVAL;
-
-		ret = i915_gem_active_retire(&vma->last_fence,
-					     &vma->obj->base.dev->struct_mutex);
+		ret = i915_gem_active_wait(&vma->last_fence,
+					   I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			return ret;
 	}
@@ -237,8 +293,8 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	if (fence->vma) {
 		struct i915_vma *old = fence->vma;
 
-		ret = i915_gem_active_retire(&old->last_fence,
-					     &old->obj->base.dev->struct_mutex);
+		ret = i915_gem_active_wait(&old->last_fence,
+					   I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			return ret;
 
@@ -263,7 +319,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	 * to the runtime resume, see i915_gem_restore_fences().
 	 */
 	if (intel_runtime_pm_get_if_in_use(ggtt->vm.i915)) {
-		fence_write(fence, vma);
+		fence_write(fence, vma, NULL);
 		intel_runtime_pm_put(ggtt->vm.i915);
 	}
 
@@ -299,7 +355,7 @@ int i915_vma_put_fence(struct i915_vma *vma)
 		return 0;
 
 	mutex_lock(&vma->vm->mutex);
-	if (!fence->pin_count)
+	if (!atomic_read(&fence->pin_count))
 		err = fence_update(fence, NULL);
 	else
 		err = -EBUSY;
@@ -319,12 +375,12 @@ void i915_vma_revoke_fence(struct i915_vma *vma)
 	if (!fence)
 		return;
 
-	GEM_BUG_ON(fence->pin_count);
+	GEM_BUG_ON(atomic_read(&fence->pin_count));
 
 	list_move(&fence->link, &i915_vm_to_ggtt(vma->vm)->fence_list);
 	vma->fence = NULL;
 
-	fence_write(fence, NULL);
+	fence_write(fence, NULL, NULL);
 	fence->vma = NULL;
 }
 
@@ -332,22 +388,27 @@ static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
 {
 	struct drm_i915_fence_reg *fence;
 
+	lockdep_assert_held(&ggtt->vm.mutex);
+
 	list_for_each_entry(fence, &ggtt->fence_list, link) {
 		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
 
-		if (fence->pin_count)
+		if (atomic_read(&fence->pin_count))
 			continue;
 
 		return fence;
 	}
 
-	/* Wait for completion of pending flips which consume fences */
-	if (intel_has_pending_fb_unpin(ggtt->vm.i915))
-		return ERR_PTR(-EAGAIN);
-
 	return ERR_PTR(-EDEADLK);
 }
 
+static void __fence_unpin(struct drm_i915_fence_reg *fence)
+{
+	GEM_BUG_ON(!atomic_read(&fence->pin_count));
+	if (atomic_dec_and_test(&fence->pin_count))
+		wake_up(&fence->ggtt->fence_wq);
+}
+
 /**
  * i915_vma_pin_fence - set up fencing for a vma
  * @vma: vma to map through a fence reg
@@ -373,30 +434,57 @@ int __i915_vma_pin_fence(struct i915_vma *vma)
 	struct drm_i915_fence_reg *fence;
 	int err;
 
-	/* Note that we revoke fences on runtime suspend. Therefore the user
+	/*
+	 * Note that we revoke fences on runtime suspend. Therefore the user
 	 * must keep the device awake whilst using the fence.
 	 */
 	assert_rpm_wakelock_held(ggtt->vm.i915);
 	lockdep_assert_held(&ggtt->vm.mutex);
+	might_sleep();
 
 	/* Just update our place in the LRU if our fence is getting reused. */
-	if (vma->fence) {
-		fence = vma->fence;
+	fence = vma->fence;
+	if (fence) {
 		GEM_BUG_ON(fence->vma != vma);
-		fence->pin_count++;
+		atomic_inc(&fence->pin_count);
 		if (!fence->dirty) {
+			err = i915_gem_active_wait(&fence->pipelined,
+						   I915_WAIT_INTERRUPTIBLE);
+			if (err)
+				goto out_unpin;
+
 			list_move_tail(&fence->link, &ggtt->fence_list);
 			return 0;
 		}
 	} else if (set) {
-		fence = fence_find(ggtt);
-		if (IS_ERR(fence))
-			return PTR_ERR(fence);
+		DEFINE_WAIT(wait);
 
-		GEM_BUG_ON(fence->pin_count);
-		fence->pin_count++;
-	} else
+		do {
+			err = prepare_to_wait_event(&ggtt->fence_wq, &wait,
+						    TASK_INTERRUPTIBLE);
+
+			fence = fence_find(ggtt);
+			if (!IS_ERR(fence))
+				break;
+
+			if (err)
+				break;
+
+			mutex_unlock(&ggtt->vm.mutex);
+
+			schedule();
+
+			mutex_lock(&ggtt->vm.mutex);
+		} while (1);
+		finish_wait(&ggtt->fence_wq, &wait);
+		if (err)
+			return err;
+
+		GEM_BUG_ON(atomic_read(&fence->pin_count));
+		atomic_set(&fence->pin_count, 1);
+	} else {
 		return 0;
+	}
 
 	err = fence_update(fence, set);
 	if (err)
@@ -409,10 +497,106 @@ int __i915_vma_pin_fence(struct i915_vma *vma)
 		return 0;
 
 out_unpin:
-	fence->pin_count--;
+	__fence_unpin(fence);
 	return err;
 }
 
+void __i915_vma_unpin_fence(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!vma->fence);
+	__fence_unpin(vma->fence);
+}
+
+int i915_vma_reserve_fence(struct i915_vma *vma)
+{
+	struct drm_i915_fence_reg *fence;
+
+	lockdep_assert_held(&vma->vm->mutex);
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+	fence = vma->fence;
+	if (!fence) {
+		GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+
+		if (!i915_gem_object_is_tiled(vma->obj))
+			return 0;
+
+		if (!i915_vma_is_map_and_fenceable(vma))
+			return -EINVAL;
+
+		fence = fence_find(i915_vm_to_ggtt(vma->vm));
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+
+		vma->fence = fence;
+
+		if (fence->vma) {
+			GEM_BUG_ON(fence->vma->fence != fence);
+			i915_vma_revoke_mmap(fence->vma);
+			fence->vma->fence = NULL;
+		}
+		fence->vma = vma;
+		fence->dirty = true;
+	}
+
+	atomic_inc(&fence->pin_count);
+	list_move_tail(&fence->link, &fence->ggtt->fence_list);
+
+	GEM_BUG_ON(!i915_gem_object_is_tiled(vma->obj));
+	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+	GEM_BUG_ON(vma->node.size != vma->fence_size);
+	GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_alignment));
+
+	return 0;
+}
+
+int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
+				  struct i915_request *rq)
+{
+	struct drm_i915_fence_reg *fence = vma->fence;
+	struct i915_request *prev;
+	int err;
+
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	GEM_BUG_ON(fence && !atomic_read(&fence->pin_count));
+
+	if (!fence)
+		goto out;
+
+	prev = i915_gem_active_raw(&fence->pipelined,
+				   &rq->i915->drm.struct_mutex);
+	if (prev) {
+		err = i915_request_await_dma_fence(rq, &prev->fence);
+		if (err)
+			return err;
+	}
+
+	if (!fence->dirty)
+		goto out;
+
+	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+
+	if (fence->vma) {
+		prev = i915_gem_active_raw(&fence->vma->last_fence,
+					   &rq->i915->drm.struct_mutex);
+		if (prev) {
+			err = i915_request_await_dma_fence(rq, &prev->fence);
+			if (err)
+				return err;
+		}
+	}
+
+	err = fence_write(fence, vma, rq);
+	if (err)
+		return err;
+
+	i915_gem_active_set(&fence->pipelined, rq);
+out:
+	i915_gem_active_set(&vma->last_fence, rq);
+	return 0;
+}
+
 /**
  * i915_reserve_fence - Reserve a fence for vGPU
  * @i915: i915 device private
@@ -433,7 +617,7 @@ i915_reserve_fence(struct drm_i915_private *i915)
 	/* Keep at least one fence available for the display engine. */
 	count = 0;
 	list_for_each_entry(fence, &ggtt->fence_list, link)
-		count += !fence->pin_count;
+		count += !atomic_read(&fence->pin_count);
 	if (count <= 1) {
 		fence = ERR_PTR(-ENOSPC);
 		goto out_unlock;
@@ -518,6 +702,7 @@ void __i915_gem_restore_fences(struct drm_i915_private *i915)
 		 */
 		if (vma && !i915_gem_object_is_tiled(vma->obj)) {
 			GEM_BUG_ON(!reg->dirty);
+			GEM_BUG_ON(atomic_read(&reg->pin_count));
 			GEM_BUG_ON(i915_vma_has_userfault(vma));
 
 			list_move(&reg->link, &ggtt->fence_list);
@@ -525,7 +710,7 @@ void __i915_gem_restore_fences(struct drm_i915_private *i915)
 			vma = NULL;
 		}
 
-		fence_write(reg, vma);
+		fence_write(reg, vma, NULL);
 		reg->vma = vma;
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index cd50a9ec36fc..f46e403a6655 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -27,6 +27,8 @@
 
 #include <linux/list.h>
 
+#include "i915_request.h"
+
 struct sg_table;
 
 struct drm_i915_private;
@@ -42,7 +44,7 @@ struct drm_i915_fence_reg {
 	struct i915_ggtt *ggtt;
 	struct i915_vma *vma;
 
-	int pin_count;
+	atomic_t pin_count;
 	int id;
 
 	/**
@@ -54,6 +56,7 @@ struct drm_i915_fence_reg {
 	 * command (such as BLT on gen2/3), as a "fence".
 	 */
 	bool dirty;
+	struct i915_gem_active pipelined;
 };
 
 struct drm_i915_fence_reg *
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 34837f20efab..081bc18e4507 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2874,6 +2874,7 @@ static int i915_ggtt_init_fences(struct i915_ggtt *ggtt)
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&ggtt->fence_list);
+	init_waitqueue_head(&ggtt->fence_wq);
 
 	/* Initialize fence registers to zero */
 	for (i = 0; i < ggtt->num_fence_regs; i++) {
@@ -2882,6 +2883,7 @@ static int i915_ggtt_init_fences(struct i915_ggtt *ggtt)
 		fence->ggtt = ggtt;
 		fence->id = i;
 		list_add_tail(&fence->link, &ggtt->fence_list);
+		init_request_active(&fence->pipelined, NULL);
 	}
 	i915_gem_restore_fences(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 58fc315d9c0a..4562385de70a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -373,6 +373,7 @@ struct i915_ggtt {
 
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;
+	struct wait_queue_head fence_wq;
 	struct drm_i915_fence_reg *fence_regs;
 	int num_fence_regs;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 89a118764277..c33fbac03805 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -990,9 +990,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	}
 	obj->read_domains |= I915_GEM_GPU_DOMAINS;
 
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		i915_gem_active_set(&vma->last_fence, rq);
-
 	export_fence(vma, rq, flags);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 7df156e1ca06..8946f28e90bf 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -395,14 +395,7 @@ static inline int i915_vma_pin_fence(struct i915_vma *vma)
 int __must_check i915_vma_put_fence(struct i915_vma *vma);
 void i915_vma_revoke_fence(struct i915_vma *vma);
 
-static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
-{
-	lockdep_assert_held(&vma->vm->mutex);
-	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-
-	GEM_BUG_ON(vma->fence->pin_count <= 0);
-	vma->fence->pin_count--;
-}
+void __i915_vma_unpin_fence(struct i915_vma *vma);
 
 /**
  * i915_vma_unpin_fence - unpin fencing state
@@ -415,16 +408,21 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 static inline void
 i915_vma_unpin_fence(struct i915_vma *vma)
 {
-	/* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */
-	if (vma->fence) {
-		mutex_lock(&vma->vm->mutex);
+	/*
+	 * The assumption is if the caller has a fence, the caller owns a pin
+	 * on that fence, i.e. that vma->fence cannot become NULL prior to us
+	 * releasing our pin.
+	 */
+	if (vma->fence)
 		__i915_vma_unpin_fence(vma);
-		mutex_unlock(&vma->vm->mutex);
-	}
 }
 
 void i915_vma_parked(struct drm_i915_private *i915);
 
+int __must_check i915_vma_reserve_fence(struct i915_vma *vma);
+int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
+				  struct i915_request *rq);
+
 #define for_each_until(cond) if (cond) break; else
 
 /**
-- 
2.18.0