[Mesa-stable] [PATCH] i965: Fix render-to-texture in non-FinishRenderTexture cases.

Courtney Goeltzenleuchter courtney at lunarg.com
Fri Apr 25 15:37:32 PDT 2014


From: Eric Anholt <eric at anholt.net>

We've had several problems now with FinishRenderTexture not getting called
enough, and we're ready to just give up on it ever doing what we need.  In
particular, an upcoming Steam title had rendering bugs that could be fixed
by always_flush_cache=true.

Instead of hoping Mesa core can figure out when we need to flush our
caches, just track what BOs we've rendered to in a set, and when we render
from a BO in that set, emit a flush and clear the set.

There's some overhead to keeping this set, but most of that is just
hashing the pointer -- it turns out our set never even gets very large,
because cache flushes are so common (even on cairo-gl).

No statistically significant performance difference in cairo-gl (n=100),
despite spending ~.5% CPU in these set operations.

v1: (Original patch by Eric Anholt.)
v2: (Changes by Ken Graunke.)
  - Rebase forward from May 7th 2013 -> March 4th 2014.
  - Drop the FinishRenderTexture hook entirely; after rebasing the
    patch, the hook was just an empty function.
  - Move the brw_render_cache_set_clear() call from
    intel_batchbuffer_emit_flush() to brw_emit_pipe_control_flush().
    In theory, this could catch more cases where we've flushed.
  - Consider stencil as a possible texturing source.
v3: (changes by anholt):
  - Move set_clear() back to emit_mi_flush() -- it means we can drop
    more forced flushes from the code.  In the previous location, it
    wouldn't have been called when we wanted pre-gen6.
  - Move the set clear from batch init to reset -- it should be empty at
    the start of every batch, since the kernel handled any inter-batch
    flush for us.
v4: Drop the debug code in set.c that I accidentally committed.
v5: Back port to 10.1 stable branch (remove reference to stencil texture.)

Signed-off-by: Eric Anholt <eric at anholt.net>
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
Tested-by: Dylan Baker <baker.dylan.c at gmail.com> [v2]

Conflicts:
	src/mesa/drivers/dri/i965/brw_draw.c
	src/mesa/drivers/dri/i965/intel_fbo.h
---
 src/mesa/drivers/dri/i965/brw_context.c       |  4 +-
 src/mesa/drivers/dri/i965/brw_context.h       |  7 ++++
 src/mesa/drivers/dri/i965/brw_draw.c          | 24 ++++++++---
 src/mesa/drivers/dri/i965/brw_misc_state.c    |  5 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  4 ++
 src/mesa/drivers/dri/i965/intel_fbo.c         | 60 ++++++++++++++++++---------
 src/mesa/drivers/dri/i965/intel_fbo.h         | 12 ++++++
 7 files changed, 89 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index d5dccaf..3d6ee6f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -680,6 +680,8 @@ brwCreateContext(gl_api api,
    /* Reinitialize the context point state.  It depends on ctx->Const values. */
    _mesa_init_point(ctx);
 
+   intel_fbo_init(brw);
+
    intel_batchbuffer_init(brw);
 
    if (brw->gen >= 6) {
@@ -703,8 +705,6 @@ brwCreateContext(gl_api api,
 
    intelInitExtensions(ctx);
 
-   intel_fbo_init(brw);
-
    brw_init_surface_formats(brw);
 
    if (brw->is_g4x || brw->gen >= 5) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index afa3350..53a72c2 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1025,6 +1025,13 @@ struct brw_context
    drm_intel_context *hw_ctx;
 
    /**
+    * Set of drm_intel_bo * that have been rendered to within this batchbuffer
+    * and would need flushing before being used from another cache domain that
+    * isn't coherent with it (i.e. the sampler).
+    */
+   struct set *render_cache;
+
+   /**
     * Number of resets observed in the system at context creation.
     *
     * This is tracked in the context so that we can determine that another
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 1e018bb..09aaf41 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -306,8 +306,8 @@ static void brw_merge_inputs( struct brw_context *brw,
 /*
  * \brief Resolve buffers before drawing.
  *
- * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
- * enabled depth texture.
+ * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
+ * enabled depth texture, and flush the render cache for any dirty textures.
  *
  * (In the future, this will also perform MSAA resolves).
  */
@@ -323,9 +323,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
    if (depth_irb)
       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 
-   /* Resolve depth buffer of each enabled depth texture, and color buffer of
-    * each fast-clear-enabled color texture.
-    */
+   /* Resolve depth buffer and render cache of each enabled texture. */
    for (int i = 0; i < ctx->Const.MaxCombinedTextureImageUnits; i++) {
       if (!ctx->Texture.Unit[i]._ReallyEnabled)
 	 continue;
@@ -334,6 +332,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
 	 continue;
       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
       intel_miptree_resolve_color(brw, tex_obj->mt);
+      brw_render_cache_set_check_flush(brw, tex_obj->mt->region->bo);
    }
 }
 
@@ -345,6 +344,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
  *
  * If the color buffer is a multisample window system buffer, then
  * mark that it needs a downsample.
+ *
+ * Also mark any render targets which will be textured as needing a render
+ * cache flush.
  */
 static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
 {
@@ -363,8 +365,18 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
       intel_renderbuffer_set_needs_downsample(front_irb);
    if (back_irb)
       intel_renderbuffer_set_needs_downsample(back_irb);
-   if (depth_irb && ctx->Depth.Mask)
+   if (depth_irb && ctx->Depth.Mask) {
       intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
+      brw_render_cache_set_add_bo(brw, depth_irb->mt->region->bo);
+   }
+
+   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+      struct intel_renderbuffer *irb =
+         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+      if (irb)
+         brw_render_cache_set_add_bo(brw, irb->mt->region->bo);
+   }
 }
 
 /* May fail if out of video memory for texture or vbo upload, or on
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index ca88b94..c0b4d2e 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -583,6 +583,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
       height = stencil_irb->Base.Base.Height;
    }
 
+   if (depth_mt)
+      brw_render_cache_set_check_flush(brw, depth_mt->region->bo);
+   if (stencil_mt)
+      brw_render_cache_set_check_flush(brw, stencil_mt->region->bo);
+
    brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
                                     depthbuffer_format, depth_surface_type,
                                     stencil_mt, hiz, separate_stencil,
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index fbbd527..9c177a2 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -30,6 +30,7 @@
 #include "intel_reg.h"
 #include "intel_bufmgr.h"
 #include "intel_buffers.h"
+#include "intel_fbo.h"
 #include "brw_context.h"
 
 static void
@@ -88,6 +89,7 @@ intel_batchbuffer_reset(struct brw_context *brw)
    brw->batch.last_bo = brw->batch.bo;
 
    intel_batchbuffer_clear_cache(brw);
+   brw_render_cache_set_clear(brw);
 
    brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
 					BATCH_SZ, 4096);
@@ -660,4 +662,6 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
       }
       brw_emit_pipe_control_flush(brw, flags);
    }
+
+   brw_render_cache_set_clear(brw);
 }
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index acb5d72..1e56d3c 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -36,6 +36,8 @@
 #include "main/context.h"
 #include "main/teximage.h"
 #include "main/image.h"
+#include "main/hash_table.h"
+#include "main/set.h"
 
 #include "swrast/swrast.h"
 #include "drivers/common/meta.h"
@@ -516,24 +518,6 @@ intel_render_texture(struct gl_context * ctx,
 }
 
 
-/**
- * Called by Mesa when rendering to a texture is done.
- */
-static void
-intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));
-
-   /* Since we've (probably) rendered to the texture and will (likely) use
-    * it in the texture domain later on in this batchbuffer, flush the
-    * batch.  Once again, we wish for a domain tracker in libdrm to cover
-    * usage inside of a batchbuffer like GEM does in the kernel.
-    */
-   intel_batchbuffer_emit_mi_flush(brw);
-}
-
 #define fbo_incomplete(fb, ...) do {                                          \
       static GLuint msg_id = 0;                                               \
       if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) {    \
@@ -889,6 +873,43 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
    intel_miptree_release(&new_mt);
 }
 
+void
+brw_render_cache_set_clear(struct brw_context *brw)
+{
+   struct set_entry *entry;
+
+   set_foreach(brw->render_cache, entry) {
+      _mesa_set_remove(brw->render_cache, entry);
+   }
+}
+
+void
+brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo)
+{
+   _mesa_set_add(brw->render_cache, _mesa_hash_pointer(bo), bo);
+}
+
+/**
+ * Emits an appropriate flush for a BO if it has been rendered to within the
+ * same batchbuffer as a read that's about to be emitted.
+ *
+ * The GPU has separate, incoherent caches for the render cache and the
+ * sampler cache, along with other caches.  Usually data in the different
+ * caches don't interact (e.g. we don't render to our driver-generated
+ * immediate constant data), but for render-to-texture in FBOs we definitely
+ * do.  When a batchbuffer is flushed, the kernel will ensure that everything
+ * necessary is flushed before another use of that BO, but for reuse from
+ * different caches within a batchbuffer, it's all our responsibility.
+ */
+void
+brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo)
+{
+   if (!_mesa_set_search(brw->render_cache, _mesa_hash_pointer(bo), bo))
+      return;
+
+   intel_batchbuffer_emit_mi_flush(brw);
+}
+
 /**
  * Do one-time context initializations related to GL_EXT_framebuffer_object.
  * Hook in device driver functions.
@@ -902,9 +923,10 @@ intel_fbo_init(struct brw_context *brw)
    dd->MapRenderbuffer = intel_map_renderbuffer;
    dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
    dd->RenderTexture = intel_render_texture;
-   dd->FinishRenderTexture = intel_finish_render_texture;
    dd->ValidateFramebuffer = intel_validate_framebuffer;
    dd->BlitFramebuffer = intel_blit_framebuffer;
    dd->EGLImageTargetRenderbufferStorage =
       intel_image_target_renderbuffer_storage;
+
+   brw->render_cache = _mesa_set_create(brw, _mesa_key_pointer_equal);
 }
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h
index 45e2cd8..46a1793 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.h
+++ b/src/mesa/drivers/dri/i965/intel_fbo.h
@@ -195,6 +195,18 @@ void intel_renderbuffer_move_to_temp(struct brw_context *brw,
                                      struct intel_renderbuffer *irb,
                                      bool invalidate);
 
+void
+intel_renderbuffer_downsample(struct brw_context *brw,
+                              struct intel_renderbuffer *irb);
+
+void
+intel_renderbuffer_upsample(struct brw_context *brw,
+                            struct intel_renderbuffer *irb);
+
+void brw_render_cache_set_clear(struct brw_context *brw);
+void brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo);
+void brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo);
+
 unsigned
 intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
 
-- 
1.8.3.2



More information about the mesa-stable mailing list