[Intel-gfx] [PATCH 14/15] drm/i915: infrastructure to track pipelined fence setup

Thu Mar 11 16:58:59 CET 2010

Fenced gtt access by the cpu needs to wait for the gpu to have updated
the fence reg if it was pipelined. This patch adds the necessary
infrastructure.

I'm not too fond of the code in i915_gem_retire_requests, but I
think that a new list to track fences a la objects for retiring
is overkill.

v2: Retire fences before request and explain in a big comment why this
leads to setup_seqno inconsistencies. Also a few new BUG_ONs if used
to check consistency (and catch the above-mentioned problem).

v3: Like for pipelined fence use, ensure that a fence doesn't outlive
it's object under certain perculiar error-conditions in
i915_gem_object_put_fence_reg.

v4: Add some more phat comments to explain what the changes in v3 actually
fixed. Also fix up a cache-coherency problem in case userspace ever mixes
fenced with non-fenced access in different batchbuffers.

v5: Adapt to Sandybridge support.

Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h |    1 +
 drivers/gpu/drm/i915/i915_gem.c |   60 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 41718be..4b4bb83 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -129,6 +129,7 @@ struct drm_i915_master_private {
 struct drm_i915_fence_reg {
 	struct drm_gem_object *obj;
 	uint32_t last_rendering_seqno;
+	uint32_t setup_seqno;
 	struct list_head lru_list;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 93480aa..a8209df 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1798,12 +1798,23 @@ i915_gem_retire_requests(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	uint32_t seqno;
+	int i;
 
 	if (!dev_priv->hw_status_page || list_empty(&dev_priv->mm.request_list))
 		return;
 
 	seqno = i915_get_gem_seqno(dev);
 
+	/* Update pipelined fences that have been setup by the gpu. Do this
+	 * before retiring request because that might free objects which in turn
+	 * might lead to reg->last_rendering_seqno == 0 with reg->setup_seqno
+	 * still being a valid seqno. Which is quite inconsistent! */
+	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+		if (i915_seqno_passed(seqno, reg->setup_seqno))
+			reg->setup_seqno = 0;
+	}
+
 	while (!list_empty(&dev_priv->mm.request_list)) {
 		struct drm_i915_gem_request *request;
 		uint32_t retiring_seqno;
@@ -2534,6 +2545,16 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj, int pipelined)
 	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
 		reg = &dev_priv->fence_regs[obj_priv->fence_reg];
 		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
+
+		/* Wait for the gpu to setup the fence it it was pipelined. */
+		if (!pipelined && reg->setup_seqno != 0) {
+			ret = i915_wait_request(dev, reg->setup_seqno);
+			if (ret != 0)
+				return ret;
+
+			BUG_ON(reg->setup_seqno != 0);
+		}
+
 		return 0;
 	}
 
@@ -2567,6 +2588,11 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj, int pipelined)
 
 	reg->obj = obj;
 
+	if (pipelined)
+		reg->setup_seqno = dev_priv->mm.next_gem_seqno;
+	else
+		reg->setup_seqno = 0;
+
 	/* WIP: Synchronize again for the not-yet-pipeplined stuff */
 	if (pipelined && reg->last_rendering_seqno != 0) {
 		ret = i915_wait_request(dev, reg->last_rendering_seqno);
@@ -2607,6 +2633,8 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj, int pipelined)
 
 	if (pipelined)
 		goto end;
+	else
+		BUG_ON(reg->setup_seqno != 0 || reg->last_rendering_seqno != 0);
 
 	if (IS_GEN6(dev)) {
 		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
@@ -2666,10 +2694,34 @@ i915_gem_object_put_fence_reg(struct drm_gem_object *obj,
 		drm_i915_private_t *dev_priv = dev->dev_private;
 		struct drm_i915_fence_reg *reg =
 			&dev_priv->fence_regs[obj_priv->fence_reg];
+
+		/* Beware the dragons! When do_execbuf fails, but we have
+		 * already stolen this fence reg from another object, there'll
+		 * be some great inconsistencies: reg->setup_seqno is the next
+		 * seqno to be issued and reg->last_rendering_seqno refers to
+		 * the old object this fence was stolen from. That's why simply
+		 * waiting for outstanding rendering on _this_ object is not
+		 * enough. Now if fences would have a life-time management
+		 * independant of bo's, this wouldn't be a problem. Alas, fences
+		 * don't track the gtt space they use, so this is not possible,
+		 * therefore they have to die when their object is unbound. So
+		 * clean up this mess here. */
+		if (reg->setup_seqno != 0) {
+			ret = i915_wait_request(dev, reg->setup_seqno);
+			if (ret != 0)
+				return ret;
+		}
+
 		if (reg->last_rendering_seqno != 0) {
 			ret = i915_wait_request(dev, reg->last_rendering_seqno);
 			if (ret != 0)
 				return ret;
+			/* Second dragon: reg->last_rendering_seqno is cleared,
+			 * when the corresponding object moves off the active
+			 * list. Because the object associated with this fence
+			 * already changed, this is not gonna hapen, so do it
+			 * manually. */
+			reg->last_rendering_seqno = 0;
 		}
 	}
 
@@ -3164,6 +3216,14 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
 		 obj->write_domain, obj->pending_write_domain);
 #endif
 	/*
+	 * Flush the write domain if we change from fenced to non-fenced
+	 * access!
+	 */
+	if (obj_priv->fenced_gpu_access && obj->write_domain != 0
+			&& !obj_priv->current_execbuffer_needs_fencing)
+		flush_domains |= obj->write_domain;
+
+	/*
 	 * If the object isn't moving to a new write domain,
 	 * let the object stay in multiple read domains
 	 */
-- 
1.6.6.1