[PATCH 44/46] drm/i915: Forgo last_fence active request tracking

Chris Wilson chris at chris-wilson.co.uk
Fri Mar 1 15:06:54 UTC 2019


We were using the last_fence to track the last request that used this
vma that might be interpreted by a fence register and forced ourselves
to wait for this request before modifying any fence register that
overlapped our vma. Due to requirement that we need to track any XY_BLT
command, linear or tiled, this in effect meant that we have to track the
vma for its active lifespan anyway, so we can forgo the explicit
last_fence tracking and just use the whole vma->active.

Another solution would be to pipeline the register updates, and would
help resolve some long running stalls for gen3 (but only gen 2 and 3!)

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gvt/gvt.h            |  2 +-
 drivers/gpu/drm/i915/i915_debugfs.c       | 24 ++++------
 drivers/gpu/drm/i915/i915_drv.c           |  4 +-
 drivers/gpu/drm/i915/i915_drv.h           |  5 +-
 drivers/gpu/drm/i915/i915_gem.c           | 58 ++---------------------
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 47 ++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c       |  1 -
 drivers/gpu/drm/i915/i915_gpu_error.c     |  9 ++--
 drivers/gpu/drm/i915/i915_reset.c         |  2 +-
 drivers/gpu/drm/i915/i915_vma.c           | 13 -----
 drivers/gpu/drm/i915/i915_vma.h           |  1 -
 drivers/gpu/drm/i915/intel_device_info.h  |  1 +
 12 files changed, 67 insertions(+), 100 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 8bce09de4b82..bde3791ec6ce 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -393,7 +393,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt);
 #define gvt_hidden_gmadr_end(gvt) (gvt_hidden_gmadr_base(gvt) \
 				   + gvt_hidden_sz(gvt) - 1)
 
-#define gvt_fence_sz(gvt) (gvt->dev_priv->num_fence_regs)
+#define gvt_fence_sz(gvt) (RUNTIME_INFO((gvt)->dev_priv)->num_fences)
 
 /* Aperture/GM space definitions for vGPU */
 #define vgpu_aperture_offset(vgpu)	((vgpu)->gm.low_gm_node.start)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 36c63b087ffd..48afa85220a4 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -205,9 +205,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 			}
 		}
 		if (vma->fence)
-			seq_printf(m, " , fence: %d%s",
-				   vma->fence->id,
-				   i915_active_request_isset(&vma->last_fence) ? "*" : "");
+			seq_printf(m, " , fence: %d", vma->fence->id);
 		seq_puts(m, ")");
 	}
 	if (obj->stolen)
@@ -892,20 +890,17 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 
 static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	int i, ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	int num_fences = RUNTIME_INFO(i915)->num_fences;
+	int i;
 
-	seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct i915_vma *vma = dev_priv->fence_regs[i].vma;
+	seq_printf(m, "Total fences = %d\n", num_fences);
+	rcu_read_lock();
+	for (i = 0; i < num_fences; i++) {
+		struct i915_vma *vma = READ_ONCE(i915->fence_regs[i].vma);
 
 		seq_printf(m, "Fence %d, pin count = %d, object = ",
-			   i, dev_priv->fence_regs[i].pin_count);
+			   i, i915->fence_regs[i].pin_count);
 		if (!vma)
 			seq_puts(m, "unused");
 		else
@@ -913,7 +908,6 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 		seq_putc(m, '\n');
 	}
 
-	mutex_unlock(&dev->struct_mutex);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a7f99f4b8bf8..320016f26a8a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -324,7 +324,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = pdev->revision;
 		break;
 	case I915_PARAM_NUM_FENCES_AVAIL:
-		value = dev_priv->num_fence_regs;
+		value = RUNTIME_INFO(dev_priv)->num_fences;
 		break;
 	case I915_PARAM_HAS_OVERLAY:
 		value = dev_priv->overlay ? 1 : 0;
@@ -1473,7 +1473,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	intel_uncore_sanitize(dev_priv);
 
 	intel_gt_init_workarounds(dev_priv);
-	i915_gem_load_init_fences(dev_priv);
+	i915_gem_init_fences(dev_priv);
 
 	/* On the 945G/GM, the chipset reports the MSI capability on the
 	 * integrated graphics even though the support isn't actually there
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b88464ee8328..fb5d465d0e45 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1591,8 +1591,7 @@ struct drm_i915_private {
 	/* protects panel power sequencer state */
 	struct mutex pps_mutex;
 
-	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
-	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
+	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
 
 	unsigned int fsb_freq, mem_freq, is_ddr3;
 	unsigned int skl_preferred_vco_freq;
@@ -2793,7 +2792,7 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 void i915_gem_sanitize(struct drm_i915_private *i915);
 int i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
-void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
+void i915_gem_init_fences(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e03b2dcdf07c..ecc62efa26bc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2012,7 +2012,6 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj, *on;
-	int i;
 
 	/*
 	 * Only called during RPM suspend. All users of the userfault_list
@@ -2025,30 +2024,13 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 				 &dev_priv->mm.userfault_list, userfault_link)
 		__i915_gem_object_release_mmap(obj);
 
-	/* The fence will be lost when the device powers down. If any were
+	/*
+	 * The fence will be lost when the device powers down. If any were
 	 * in use by hardware (i.e. they are pinned), we should not be powering
 	 * down! All other fences will be reacquired by the user upon waking.
+	 *
+	 * On resume, we call i915_gem_restore_fences() to reset the registers.
 	 */
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-
-		/* Ideally we want to assert that the fence register is not
-		 * live at this point (i.e. that no piece of code will be
-		 * trying to write through fence + GTT, as that both violates
-		 * our tracking of activity and associated locking/barriers,
-		 * but also is illegal given that the hw is powered down).
-		 *
-		 * Previously we used reg->pin_count as a "liveness" indicator.
-		 * That is not sufficient, and we need a more fine-grained
-		 * tool if we want to have a sanity check here.
-		 */
-
-		if (!reg->vma)
-			continue;
-
-		GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
-		reg->dirty = true;
-	}
 }
 
 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
@@ -5036,38 +5018,6 @@ i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
 		dev_priv->gt.cleanup_engine(engine);
 }
 
-void
-i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
-{
-	int i;
-
-	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
-	    !IS_CHERRYVIEW(dev_priv))
-		dev_priv->num_fence_regs = 32;
-	else if (INTEL_GEN(dev_priv) >= 4 ||
-		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
-		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
-		dev_priv->num_fence_regs = 16;
-	else
-		dev_priv->num_fence_regs = 8;
-
-	if (intel_vgpu_active(dev_priv))
-		dev_priv->num_fence_regs =
-				I915_READ(vgtif_reg(avail_rs.fence_num));
-
-	/* Initialize fence registers to zero */
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
-
-		fence->i915 = dev_priv;
-		fence->id = i;
-		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
-	}
-	i915_gem_restore_fences(dev_priv);
-
-	i915_gem_detect_bit_6_swizzle(dev_priv);
-}
-
 static void i915_gem_init__mm(struct drm_i915_private *i915)
 {
 	spin_lock_init(&i915->mm.object_stat_lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 65624b8e4d15..02d7ef94df5b 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -22,7 +22,9 @@
  */
 
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "i915_pvinfo.h"
 
 /**
  * DOC: fence register handling
@@ -224,16 +226,14 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 			 i915_gem_object_get_tiling(vma->obj)))
 			return -EINVAL;
 
-		ret = i915_active_request_retire(&vma->last_fence,
-					     &vma->obj->base.dev->struct_mutex);
+		ret = i915_active_wait(&vma->active);
 		if (ret)
 			return ret;
 	}
 
 	old = xchg(&fence->vma, NULL);
 	if (old) {
-		ret = i915_active_request_retire(&old->last_fence,
-					     &old->obj->base.dev->struct_mutex);
+		ret = i915_active_wait(&fence->vma->active);
 		if (ret) {
 			fence->vma = old;
 			return ret;
@@ -455,7 +455,7 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
 	int i;
 
 	rcu_read_lock(); /* keep obj alive as we dereference */
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
+	for (i = 0; i < RUNTIME_INFO(dev_priv)->num_fences; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 		struct i915_vma *vma = READ_ONCE(reg->vma);
 
@@ -785,3 +785,40 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
 		i++;
 	}
 }
+
+void i915_gem_init_fences(struct drm_i915_private *dev_priv)
+{
+	int num_fences;
+	int i;
+
+	i915_gem_detect_bit_6_swizzle(dev_priv);
+
+	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
+	    !IS_CHERRYVIEW(dev_priv))
+		num_fences = 32;
+	else if (INTEL_GEN(dev_priv) >= 4 ||
+		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
+		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
+		num_fences = 16;
+	else
+		num_fences = 8;
+
+	if (intel_vgpu_active(dev_priv))
+		num_fences = I915_READ(vgtif_reg(avail_rs.fence_num));
+
+	GEM_BUG_ON(num_fences > ARRAY_SIZE(dev_priv->fence_regs));
+	num_fences = min_t(int, num_fences, ARRAY_SIZE(dev_priv->fence_regs));
+
+	/* Initialize fence registers to zero */
+	for (i = 0; i < num_fences; i++) {
+		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+
+		fence->i915 = dev_priv;
+		fence->id = i;
+		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
+
+		fence_write(fence, NULL);
+	}
+
+	RUNTIME_INFO(dev_priv)->num_fences = num_fences;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d3f80dbf13ef..6dc1acd1d4eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1918,7 +1918,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 		return ERR_PTR(-ENOMEM);
 
 	i915_active_init(i915, &vma->active, NULL);
-	INIT_ACTIVE_REQUEST(&vma->last_fence);
 
 	vma->vm = &ggtt->vm;
 	vma->ops = &pd_vma_ops;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fa86c60fb56c..85e21d1ad503 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1123,19 +1123,20 @@ static u32 i915_error_generate_code(struct i915_gpu_state *error,
 static void gem_record_fences(struct i915_gpu_state *error)
 {
 	struct drm_i915_private *dev_priv = error->i915;
+	int num_fences = RUNTIME_INFO(dev_priv)->num_fences;
 	int i;
 
 	if (INTEL_GEN(dev_priv) >= 6) {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
+		for (i = 0; i < num_fences; i++)
 			error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
 	} else if (INTEL_GEN(dev_priv) >= 4) {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
+		for (i = 0; i < num_fences; i++)
 			error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
 	} else {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
+		for (i = 0; i < num_fences; i++)
 			error->fence[i] = I915_READ(FENCE_REG(i));
 	}
-	error->nfence = i;
+	error->nfence = num_fences;
 }
 
 static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index b59557693319..09eb1c5be798 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -643,7 +643,7 @@ static void revoke_mmaps(struct drm_i915_private *i915)
 {
 	int i;
 
-	for (i = 0; i < i915->num_fence_regs; i++) {
+	for (i = 0; i < RUNTIME_INFO(i915)->num_fences; i++) {
 		struct drm_vma_offset_node *node;
 		struct i915_vma *vma;
 		u64 vma_offset;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 757a33998bbf..f4b38c2e8533 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -134,7 +134,6 @@ vma_create(struct drm_i915_gem_object *obj,
 		return ERR_PTR(-ENOMEM);
 
 	i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
-	INIT_ACTIVE_REQUEST(&vma->last_fence);
 
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -820,8 +819,6 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
 
-	GEM_BUG_ON(i915_active_request_isset(&vma->last_fence));
-
 	mutex_lock(&vma->vm->mutex);
 	list_del(&vma->vm_link);
 	mutex_unlock(&vma->vm->mutex);
@@ -960,9 +957,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	}
 	obj->read_domains |= I915_GEM_GPU_DOMAINS;
 
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		__i915_active_request_set(&vma->last_fence, rq);
-
 	export_fence(vma, rq, flags);
 	return 0;
 }
@@ -993,14 +987,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 * before we are finished).
 		 */
 		__i915_vma_pin(vma);
-
 		ret = i915_active_wait(&vma->active);
-		if (ret)
-			goto unpin;
-
-		ret = i915_active_request_retire(&vma->last_fence,
-					      &vma->vm->i915->drm.struct_mutex);
-unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 37f93358aa3c..b0b126a1ffbe 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -110,7 +110,6 @@ struct i915_vma {
 #define I915_VMA_GGTT_WRITE	BIT(15)
 
 	struct i915_active active;
-	struct i915_active_request last_fence;
 
 	/**
 	 * Support different GGTT views into the same object.
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 047d10bdd455..25a1bb79a95f 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -201,6 +201,7 @@ struct intel_runtime_info {
 	u8 num_scalers[I915_MAX_PIPES];
 
 	u8 num_engines;
+	u8 num_fences;
 
 	/* Slice/subslice/EU info */
 	struct sseu_dev_info sseu;
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list