[Mesa-dev] [PATCH 1/2] i965: Fix render-to-texture in non-FinishRenderTexture cases.

Eric Anholt eric at anholt.net
Wed Mar 5 16:17:24 PST 2014


We've had several problems now with FinishRenderTexture not getting called
enough, and we're ready to just give up on it ever doing what we need.  In
particular, an upcoming Steam title had rendering bugs that could be fixed
by always_flush_cache=true.

Instead of hoping Mesa core can figure out when we need to flush our
caches, just track what BOs we've rendered to in a set, and when we render
from a BO in that set, emit a flush and clear the set.

There's some overhead to keeping this set, but most of that is just
hashing the pointer -- it turns out our set never even gets very large,
because cache flushes are so common (even on cairo-gl).

No statistically significant performance difference in cairo-gl (n=100),
despite spending ~.5% CPU in these set operations.

v1: (Original patch by Eric Anholt.)
v2: (Changes by Ken Graunke.)
  - Rebase forward from May 7th 2013 -> March 4th 2014.
  - Drop the FinishRenderTexture hook entirely; after rebasing the
    patch, the hook was just an empty function.
  - Move the brw_render_cache_set_clear() call from
    intel_batchbuffer_emit_flush() to brw_emit_pipe_control_flush().
    In theory, this could catch more cases where we've flushed.
  - Consider stencil as a possible texturing source.
v3: (changes by anholt):
  - Move set_clear() back to emit_mi_flush() -- it means we can drop
    more forced flushes from the code.  In the previous location, it
    wouldn't have been called when we wanted pre-gen6.
  - Move the set clear from batch init to reset -- it should be empty at
    the start of every batch, since the kernel handled any inter-batch
    flush for us.

Signed-off-by: Eric Anholt <eric at anholt.net>
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_context.c       |  4 +-
 src/mesa/drivers/dri/i965/brw_context.h       |  7 +++
 src/mesa/drivers/dri/i965/brw_draw.c          | 30 +++++++++---
 src/mesa/drivers/dri/i965/brw_misc_state.c    |  5 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  4 ++
 src/mesa/drivers/dri/i965/intel_fbo.c         | 66 +++++++++++++++++++--------
 src/mesa/drivers/dri/i965/intel_fbo.h         |  4 ++
 src/mesa/main/set.c                           |  6 +++
 8 files changed, 99 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 1441b46..026b3aa 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -698,6 +698,8 @@ brwCreateContext(gl_api api,
    /* Reinitialize the context point state.  It depends on ctx->Const values. */
    _mesa_init_point(ctx);
 
+   intel_fbo_init(brw);
+
    intel_batchbuffer_init(brw);
 
    if (brw->gen >= 6) {
@@ -721,8 +723,6 @@ brwCreateContext(gl_api api,
 
    intelInitExtensions(ctx);
 
-   intel_fbo_init(brw);
-
    brw_init_surface_formats(brw);
 
    if (brw->is_g4x || brw->gen >= 5) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index dbb30f2..734d273 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1030,6 +1030,13 @@ struct brw_context
    drm_intel_context *hw_ctx;
 
    /**
+    * Set of drm_intel_bo * that have been rendered to within this batchbuffer
+    * and would need flushing before being used from another cache domain that
+    * isn't coherent with it (i.e. the sampler).
+    */
+   struct set *render_cache;
+
+   /**
     * Number of resets observed in the system at context creation.
     *
     * This is tracked in the context so that we can determine that another
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index bc887fe..11a0361 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -297,8 +297,8 @@ static void brw_merge_inputs( struct brw_context *brw,
 /*
  * \brief Resolve buffers before drawing.
  *
- * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
- * enabled depth texture.
+ * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
+ * enabled depth texture, and flush the render cache for any dirty textures.
  *
  * (In the future, this will also perform MSAA resolves).
  */
@@ -314,9 +314,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
    if (depth_irb)
       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 
-   /* Resolve depth buffer of each enabled depth texture, and color buffer of
-    * each fast-clear-enabled color texture.
-    */
+   /* Resolve depth buffer and render cache of each enabled texture. */
    for (int i = 0; i < ctx->Const.MaxCombinedTextureImageUnits; i++) {
       if (!ctx->Texture.Unit[i]._ReallyEnabled)
 	 continue;
@@ -325,6 +323,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
 	 continue;
       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
       intel_miptree_resolve_color(brw, tex_obj->mt);
+      brw_render_cache_set_check_flush(brw, tex_obj->mt->region->bo);
    }
 }
 
@@ -336,6 +335,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
  *
  * If the color buffer is a multisample window system buffer, then
  * mark that it needs a downsample.
+ *
+ * Also mark any render targets which will be textured as needing a render
+ * cache flush.
  */
 static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
 {
@@ -345,6 +347,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
    struct intel_renderbuffer *front_irb = NULL;
    struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
    struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
    struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
 
    if (brw->is_front_buffer_rendering)
@@ -354,8 +357,23 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
       front_irb->need_downsample = true;
    if (back_irb)
       back_irb->need_downsample = true;
-   if (depth_irb && ctx->Depth.Mask)
+   if (depth_irb && ctx->Depth.Mask) {
       intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
+      brw_render_cache_set_add_bo(brw, depth_irb->mt->region->bo);
+   }
+
+   if (ctx->Extensions.ARB_stencil_texturing &&
+       stencil_irb && ctx->Stencil._WriteEnabled) {
+      brw_render_cache_set_add_bo(brw, stencil_irb->mt->region->bo);
+   }
+
+   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+      struct intel_renderbuffer *irb =
+         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+      if (irb)
+         brw_render_cache_set_add_bo(brw, irb->mt->region->bo);
+   }
 }
 
 /* May fail if out of video memory for texture or vbo upload, or on
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index b984d47..c8fb6f3 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -552,6 +552,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
       height = stencil_irb->Base.Base.Height;
    }
 
+   if (depth_mt)
+      brw_render_cache_set_check_flush(brw, depth_mt->region->bo);
+   if (stencil_mt)
+      brw_render_cache_set_check_flush(brw, stencil_mt->region->bo);
+
    brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
                                     depthbuffer_format, depth_surface_type,
                                     stencil_mt, hiz, separate_stencil,
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 98759e2..fe95342 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -30,6 +30,7 @@
 #include "intel_reg.h"
 #include "intel_bufmgr.h"
 #include "intel_buffers.h"
+#include "intel_fbo.h"
 #include "brw_context.h"
 
 static void
@@ -88,6 +89,7 @@ intel_batchbuffer_reset(struct brw_context *brw)
    brw->batch.last_bo = brw->batch.bo;
 
    intel_batchbuffer_clear_cache(brw);
+   brw_render_cache_set_clear(brw);
 
    brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
 					BATCH_SZ, 4096);
@@ -696,6 +698,8 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
       }
       brw_emit_pipe_control_flush(brw, flags);
    }
+
+   brw_render_cache_set_clear(brw);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index d11cdb6..0f9abac 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -36,6 +36,8 @@
 #include "main/context.h"
 #include "main/teximage.h"
 #include "main/image.h"
+#include "main/hash_table.h"
+#include "main/set.h"
 
 #include "swrast/swrast.h"
 #include "drivers/common/meta.h"
@@ -600,24 +602,6 @@ intel_render_texture(struct gl_context * ctx,
 }
 
 
-/**
- * Called by Mesa when rendering to a texture is done.
- */
-static void
-intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));
-
-   /* Since we've (probably) rendered to the texture and will (likely) use
-    * it in the texture domain later on in this batchbuffer, flush the
-    * batch.  Once again, we wish for a domain tracker in libdrm to cover
-    * usage inside of a batchbuffer like GEM does in the kernel.
-    */
-   intel_batchbuffer_emit_mi_flush(brw);
-}
-
 #define fbo_incomplete(fb, ...) do {                                          \
       static GLuint msg_id = 0;                                               \
       if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) {    \
@@ -953,6 +937,49 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
    intel_miptree_release(&new_mt);
 }
 
+void
+brw_render_cache_set_clear(struct brw_context *brw)
+{
+   struct set_entry *entry;
+
+   fprintf(stderr, "clear\n");
+
+   set_foreach(brw->render_cache, entry) {
+      _mesa_set_remove(brw->render_cache, entry);
+   }
+}
+
+void
+brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo)
+{
+   printf("    add bo %p\n", bo);
+   _mesa_set_add(brw->render_cache, _mesa_hash_pointer(bo), bo);
+}
+
+/**
+ * Emits an appropriate flush for a BO if it has been rendered to within the
+ * same batchbuffer as a read that's about to be emitted.
+ *
+ * The GPU has separate, incoherent caches for the render cache and the
+ * sampler cache, along with other caches.  Usually data in the different
+ * caches don't interact (e.g. we don't render to our driver-generated
+ * immediate constant data), but for render-to-texture in FBOs we definitely
+ * do.  When a batchbuffer is flushed, the kernel will ensure that everything
+ * necessary is flushed before another use of that BO, but for reuse from
+ * different caches within a batchbuffer, it's all our responsibility.
+ */
+void
+brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo)
+{
+   if (!_mesa_set_search(brw->render_cache, _mesa_hash_pointer(bo), bo)) {
+      printf("noflush bo %p\n", bo);
+      return;
+   }
+
+   printf("  flush bo %p\n", bo);
+   intel_batchbuffer_emit_mi_flush(brw);
+}
+
 /**
  * Do one-time context initializations related to GL_EXT_framebuffer_object.
  * Hook in device driver functions.
@@ -966,9 +993,10 @@ intel_fbo_init(struct brw_context *brw)
    dd->MapRenderbuffer = intel_map_renderbuffer;
    dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
    dd->RenderTexture = intel_render_texture;
-   dd->FinishRenderTexture = intel_finish_render_texture;
    dd->ValidateFramebuffer = intel_validate_framebuffer;
    dd->BlitFramebuffer = intel_blit_framebuffer;
    dd->EGLImageTargetRenderbufferStorage =
       intel_image_target_renderbuffer_storage;
+
+   brw->render_cache = _mesa_set_create(brw, _mesa_key_pointer_equal);
 }
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h
index b8db7e2..fa457e2 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.h
+++ b/src/mesa/drivers/dri/i965/intel_fbo.h
@@ -237,6 +237,10 @@ void
 intel_renderbuffer_upsample(struct brw_context *brw,
                             struct intel_renderbuffer *irb);
 
+void brw_render_cache_set_clear(struct brw_context *brw);
+void brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo);
+void brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo);
+
 unsigned
 intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
 
diff --git a/src/mesa/main/set.c b/src/mesa/main/set.c
index dc3550c..bbae906 100644
--- a/src/mesa/main/set.c
+++ b/src/mesa/main/set.c
@@ -199,6 +199,8 @@ set_rehash(struct set *ht, int new_size_index)
    if (table == NULL)
       return;
 
+   fprintf(stderr, "rehash %d\n", hash_sizes[ht->size_index].max_entries);
+
    old_ht = *ht;
 
    ht->table = table;
@@ -237,11 +239,15 @@ _mesa_set_add(struct set *ht, uint32_t hash, const void *key)
       set_rehash(ht, ht->size_index);
    }
 
+   fprintf(stderr, "add %d %d\n", ht->entries, ht->deleted_entries);
    hash_address = hash % ht->size;
    do {
       struct set_entry *entry = ht->table + hash_address;
       uint32_t double_hash;
 
+      fprintf(stderr, "%d %d\n", entry_is_present(entry),
+             entry_is_deleted(entry));
+
       if (!entry_is_present(entry)) {
          if (entry_is_deleted(entry))
             ht->deleted_entries--;
-- 
1.9.0



More information about the mesa-dev mailing list