[Mesa-dev] [PATCH 4/4] i965: Track the depth and render caches separately

Jason Ekstrand jason at jlekstrand.net
Mon Nov 6 21:38:57 UTC 2017


Previously, we just had one hash set for tracking depth and render
caches called brw_context::render_cache.  This is less than ideal
because the depth and render caches are separate and we can't track
moves between the depth and the render caches.  This limitation led
to some unnecessary flushing around the depth cache.  There are cases
(mostly with BLORP) where we can end up touching a depth or stencil
buffer through the render cache.  To guard against this, blorp would
unconditionally do a render_cache_set_check_flush on it's destination
which meant that if you did any rendering (including a BLORP operation)
to a given surface and then used it as a blorp destination, you would
end up flushing it out of the render cache before rendering into it.

Things get worse when you dig into the depth/stencil state code for
regular GL draw calls.  Because we may end up rendering to a depth
or stencil buffer via BLORP, we did a render_cache_set_check_flush on
all depth and stencil buffers in brw_emit_depthbuffer to ensure that
they got flushed out of the render cache prior to using them for depth
or stencil testing.  However, because we also need to track dirtiness
for depth and stencil so that we can implement depth and stencil
texturing correctly, we were adding all depth and stencil buffers to the
render cache set in brw_postdraw_set_buffers_need_resolve.  This meant
that, if anything caused 3DSTATE_DEPTH_BUFFER to get re-emitted
(currently _NEW_BUFFERS, BRW_NEW_BATCH, and BRW_NEW_BLORP), we would
almost always do a full pipeline stall and render/depth cache flush.

The root cause of both of these problems is that we can't tell the
difference between the render and depth caches in our tracking.  This
commit splits our cache tracking into two sets, one for render and one
for depth, and properly handles transitioning between the two.  We still
flush all the caches whenever anything needs to be flushed.  The idea is
that if we're going to take the hit of a flush and stall, we may as well
flush everything in the hopes that we can avoid a flush by something
else later.
---
 src/mesa/drivers/dri/i965/brw_context.h       |  7 ++++++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  2 +-
 src/mesa/drivers/dri/i965/intel_fbo.c         | 33 ++++++++++++++-------------
 src/mesa/drivers/dri/i965/intel_fbo.h         |  5 +---
 4 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 3bee3e9..d141872 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -688,6 +688,13 @@ struct brw_context
    struct set *render_cache;
 
    /**
+    * Set of struct brw_bo * that have been used as a depth buffer within this
+    * batchbuffer and would need flushing before being used from another cache
+    * domain that isn't coherent with it (i.e. the sampler).
+    */
+   struct set *depth_cache;
+
+   /**
     * Number of resets observed in the system at context creation.
     *
     * This is tracked in the context so that we can determine that another
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 1a366c7..33c79a2 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -220,7 +220,7 @@ static void
 intel_batchbuffer_reset_and_clear_render_cache(struct brw_context *brw)
 {
    intel_batchbuffer_reset(brw);
-   brw_render_cache_set_clear(brw);
+   brw_cache_sets_clear(brw);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 927f589..75c85ec 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -970,19 +970,16 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
 }
 
 void
-brw_render_cache_set_clear(struct brw_context *brw)
+brw_cache_sets_clear(struct brw_context *brw)
 {
    struct set_entry *entry;
 
    set_foreach(brw->render_cache, entry) {
       _mesa_set_remove(brw->render_cache, entry);
    }
-}
 
-void
-brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo)
-{
-   _mesa_set_add(brw->render_cache, bo);
+   set_foreach(brw->depth_cache, entry)
+      _mesa_set_remove(brw->depth_cache, entry);
 }
 
 /**
@@ -997,14 +994,11 @@ brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo)
  * necessary is flushed before another use of that BO, but for reuse from
  * different caches within a batchbuffer, it's all our responsibility.
  */
-void
-brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo)
+static void
+flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
-   if (!_mesa_set_search(brw->render_cache, bo))
-      return;
-
    if (devinfo->gen >= 6) {
       brw_emit_pipe_control_flush(brw,
                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
@@ -1018,36 +1012,41 @@ brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo)
       brw_emit_mi_flush(brw);
    }
 
-   brw_render_cache_set_clear(brw);
+   brw_cache_sets_clear(brw);
 }
 
 void
 brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo)
 {
-   brw_render_cache_set_check_flush(brw, bo);
+   if (_mesa_set_search(brw->render_cache, bo) ||
+       _mesa_set_search(brw->depth_cache, bo))
+      flush_depth_and_render_caches(brw, bo);
 }
 
 void
 brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo)
 {
+   if (_mesa_set_search(brw->depth_cache, bo))
+      flush_depth_and_render_caches(brw, bo);
 }
 
 void
 brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)
 {
-   brw_render_cache_set_add_bo(brw, bo);
+   _mesa_set_add(brw->render_cache, bo);
 }
 
 void
 brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo)
 {
-   brw_render_cache_set_check_flush(brw, bo);
+   if (_mesa_set_search(brw->render_cache, bo))
+      flush_depth_and_render_caches(brw, bo);
 }
 
 void
 brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)
 {
-   brw_render_cache_set_add_bo(brw, bo);
+   _mesa_set_add(brw->depth_cache, bo);
 }
 
 /**
@@ -1069,4 +1068,6 @@ intel_fbo_init(struct brw_context *brw)
 
    brw->render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
                                         _mesa_key_pointer_equal);
+   brw->depth_cache = _mesa_set_create(brw, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
 }
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h
index 74f235d..af4fb8a 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.h
+++ b/src/mesa/drivers/dri/i965/intel_fbo.h
@@ -229,10 +229,7 @@ void
 intel_renderbuffer_upsample(struct brw_context *brw,
                             struct intel_renderbuffer *irb);
 
-void brw_render_cache_set_clear(struct brw_context *brw);
-void brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo);
-void brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo);
-
+void brw_cache_sets_clear(struct brw_context *brw);
 void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo);
 void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo);
 void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo);
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list