[Intel-gfx] [PATCH] drm/i915: Only insert the mb() before updating the fence parameter
Chris Wilson
chris at chris-wilson.co.uk
Tue Oct 9 10:38:58 CEST 2012
With a fence, we only need to insert a memory barrier around the actual
fence alteration for CPU accesses through the GTT. Performing the
barrier in flush-fence was inserting unnecessary and expensive barriers
for never fenced objects.
Note removing the barriers from flush-fence, which was effectively a
barrier before every direct access through the GTT, revealed that we
where missing a barrier before the first access through the GTT. Lack of
that barrier was sufficient to cause GPU hangs.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 05ff790..c1ef0a8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2771,9 +2771,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
POSTING_READ(FENCE_REG_830_0 + reg * 4);
}
+inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
+{
+ return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+}
+
static void i915_gem_write_fence(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ /* Ensure that all CPU reads are completed before installing a fence
+ * and all writes before removing the fence.
+ */
+ if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
+ mb();
+
switch (INTEL_INFO(dev)->gen) {
case 7:
case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
@@ -2783,6 +2796,9 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
case 2: i830_write_fence_reg(dev, reg, obj); break;
default: break;
}
+
+ if (i915_gem_object_needs_mb(obj))
+ mb();
}
static inline int fence_number(struct drm_i915_private *dev_priv,
@@ -2812,7 +2828,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
}
static int
-i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
+i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
{
if (obj->last_fenced_seqno) {
int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
@@ -2822,12 +2838,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
obj->last_fenced_seqno = 0;
}
- /* Ensure that all CPU reads are completed before installing a fence
- * and all writes before removing the fence.
- */
- if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
- mb();
-
obj->fenced_gpu_access = false;
return 0;
}
@@ -2838,7 +2848,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
int ret;
- ret = i915_gem_object_flush_fence(obj);
+ ret = i915_gem_object_wait_fence(obj);
if (ret)
return ret;
@@ -2912,7 +2922,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
* will need to serialise the write to the associated fence register?
*/
if (obj->fence_dirty) {
- ret = i915_gem_object_flush_fence(obj);
+ ret = i915_gem_object_wait_fence(obj);
if (ret)
return ret;
}
@@ -2933,7 +2943,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
if (reg->obj) {
struct drm_i915_gem_object *old = reg->obj;
- ret = i915_gem_object_flush_fence(old);
+ ret = i915_gem_object_wait_fence(old);
if (ret)
return ret;
@@ -3244,6 +3254,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
i915_gem_object_flush_cpu_write_domain(obj);
+ if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+ mb();
+
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
--
1.7.10.4
More information about the Intel-gfx
mailing list