<div dir="ltr">FYI: This appears to help Car Chase by around 1%. Not much, but I'll take it.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Nov 6, 2017 at 1:38 PM, Jason Ekstrand <span dir="ltr"><<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Previously, we just had one hash set for tracking depth and render<br>
caches called brw_context::render_cache. This is less than ideal<br>
because the depth and render caches are separate and we can't track<br>
moves between the depth and the render caches. This limitation led<br>
to some unnecessary flushing around the depth cache. There are cases<br>
(mostly with BLORP) where we can end up touching a depth or stencil<br>
buffer through the render cache. To guard against this, blorp would<br>
unconditionally do a render_cache_set_check_flush on it's destination<br>
which meant that if you did any rendering (including a BLORP operation)<br>
to a given surface and then used it as a blorp destination, you would<br>
end up flushing it out of the render cache before rendering into it.<br>
<br>
Things get worse when you dig into the depth/stencil state code for<br>
regular GL draw calls. Because we may end up rendering to a depth<br>
or stencil buffer via BLORP, we did a render_cache_set_check_flush on<br>
all depth and stencil buffers in brw_emit_depthbuffer to ensure that<br>
they got flushed out of the render cache prior to using them for depth<br>
or stencil testing. However, because we also need to track dirtiness<br>
for depth and stencil so that we can implement depth and stencil<br>
texturing correctly, we were adding all depth and stencil buffers to the<br>
render cache set in brw_postdraw_set_buffers_need_<wbr>resolve. This meant<br>
that, if anything caused 3DSTATE_DEPTH_BUFFER to get re-emitted<br>
(currently _NEW_BUFFERS, BRW_NEW_BATCH, and BRW_NEW_BLORP), we would<br>
almost always do a full pipeline stall and render/depth cache flush.<br>
<br>
The root cause of both of these problems is that we can't tell the<br>
difference between the render and depth caches in our tracking. This<br>
commit splits our cache tracking into two sets, one for render and one<br>
for depth, and properly handles transitioning between the two. We still<br>
flush all the caches whenever anything needs to be flushed. The idea is<br>
that if we're going to take the hit of a flush and stall, we may as well<br>
flush everything in the hopes that we can avoid a flush by something<br>
else later.<br>
---<br>
src/mesa/drivers/dri/i965/brw_<wbr>context.h | 7 ++++++<br>
src/mesa/drivers/dri/i965/<wbr>intel_batchbuffer.c | 2 +-<br>
src/mesa/drivers/dri/i965/<wbr>intel_fbo.c | 33 ++++++++++++++-------------<br>
src/mesa/drivers/dri/i965/<wbr>intel_fbo.h | 5 +---<br>
4 files changed, 26 insertions(+), 21 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_context.h b/src/mesa/drivers/dri/i965/<wbr>brw_context.h<br>
index 3bee3e9..d141872 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_context.h<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_context.h<br>
@@ -688,6 +688,13 @@ struct brw_context<br>
struct set *render_cache;<br>
<br>
/**<br>
+ * Set of struct brw_bo * that have been used as a depth buffer within this<br>
+ * batchbuffer and would need flushing before being used from another cache<br>
+ * domain that isn't coherent with it (i.e. the sampler).<br>
+ */<br>
+ struct set *depth_cache;<br>
+<br>
+ /**<br>
* Number of resets observed in the system at context creation.<br>
*<br>
* This is tracked in the context so that we can determine that another<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>intel_batchbuffer.c b/src/mesa/drivers/dri/i965/<wbr>intel_batchbuffer.c<br>
index 1a366c7..33c79a2 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>intel_batchbuffer.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>intel_batchbuffer.c<br>
@@ -220,7 +220,7 @@ static void<br>
intel_batchbuffer_reset_and_<wbr>clear_render_cache(struct brw_context *brw)<br>
{<br>
intel_batchbuffer_reset(brw);<br>
- brw_render_cache_set_clear(<wbr>brw);<br>
+ brw_cache_sets_clear(brw);<br>
}<br>
<br>
void<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>intel_fbo.c b/src/mesa/drivers/dri/i965/<wbr>intel_fbo.c<br>
index 927f589..75c85ec 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>intel_fbo.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>intel_fbo.c<br>
@@ -970,19 +970,16 @@ intel_renderbuffer_move_to_<wbr>temp(struct brw_context *brw,<br>
}<br>
<br>
void<br>
-brw_render_cache_set_clear(<wbr>struct brw_context *brw)<br>
+brw_cache_sets_clear(struct brw_context *brw)<br>
{<br>
struct set_entry *entry;<br>
<br>
set_foreach(brw->render_cache, entry) {<br>
_mesa_set_remove(brw->render_<wbr>cache, entry);<br>
}<br>
-}<br>
<br>
-void<br>
-brw_render_cache_set_add_bo(<wbr>struct brw_context *brw, struct brw_bo *bo)<br>
-{<br>
- _mesa_set_add(brw->render_<wbr>cache, bo);<br>
+ set_foreach(brw->depth_cache, entry)<br>
+ _mesa_set_remove(brw->depth_<wbr>cache, entry);<br>
}<br>
<br>
/**<br>
@@ -997,14 +994,11 @@ brw_render_cache_set_add_bo(<wbr>struct brw_context *brw, struct brw_bo *bo)<br>
* necessary is flushed before another use of that BO, but for reuse from<br>
* different caches within a batchbuffer, it's all our responsibility.<br>
*/<br>
-void<br>
-brw_render_cache_set_check_<wbr>flush(struct brw_context *brw, struct brw_bo *bo)<br>
+static void<br>
+flush_depth_and_render_<wbr>caches(struct brw_context *brw, struct brw_bo *bo)<br>
{<br>
const struct gen_device_info *devinfo = &brw->screen->devinfo;<br>
<br>
- if (!_mesa_set_search(brw-><wbr>render_cache, bo))<br>
- return;<br>
-<br>
if (devinfo->gen >= 6) {<br>
brw_emit_pipe_control_flush(<wbr>brw,<br>
PIPE_CONTROL_DEPTH_CACHE_FLUSH |<br>
@@ -1018,36 +1012,41 @@ brw_render_cache_set_check_<wbr>flush(struct brw_context *brw, struct brw_bo *bo)<br>
brw_emit_mi_flush(brw);<br>
}<br>
<br>
- brw_render_cache_set_clear(<wbr>brw);<br>
+ brw_cache_sets_clear(brw);<br>
}<br>
<br>
void<br>
brw_cache_flush_for_read(<wbr>struct brw_context *brw, struct brw_bo *bo)<br>
{<br>
- brw_render_cache_set_check_<wbr>flush(brw, bo);<br>
+ if (_mesa_set_search(brw->render_<wbr>cache, bo) ||<br>
+ _mesa_set_search(brw->depth_<wbr>cache, bo))<br>
+ flush_depth_and_render_caches(<wbr>brw, bo);<br>
}<br>
<br>
void<br>
brw_cache_flush_for_render(<wbr>struct brw_context *brw, struct brw_bo *bo)<br>
{<br>
+ if (_mesa_set_search(brw->depth_<wbr>cache, bo))<br>
+ flush_depth_and_render_caches(<wbr>brw, bo);<br>
}<br>
<br>
void<br>
brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)<br>
{<br>
- brw_render_cache_set_add_bo(<wbr>brw, bo);<br>
+ _mesa_set_add(brw->render_<wbr>cache, bo);<br>
}<br>
<br>
void<br>
brw_cache_flush_for_depth(<wbr>struct brw_context *brw, struct brw_bo *bo)<br>
{<br>
- brw_render_cache_set_check_<wbr>flush(brw, bo);<br>
+ if (_mesa_set_search(brw->render_<wbr>cache, bo))<br>
+ flush_depth_and_render_caches(<wbr>brw, bo);<br>
}<br>
<br>
void<br>
brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)<br>
{<br>
- brw_render_cache_set_add_bo(<wbr>brw, bo);<br>
+ _mesa_set_add(brw->depth_<wbr>cache, bo);<br>
}<br>
<br>
/**<br>
@@ -1069,4 +1068,6 @@ intel_fbo_init(struct brw_context *brw)<br>
<br>
brw->render_cache = _mesa_set_create(brw, _mesa_hash_pointer,<br>
_mesa_key_pointer_equal);<br>
+ brw->depth_cache = _mesa_set_create(brw, _mesa_hash_pointer,<br>
+ _mesa_key_pointer_equal);<br>
}<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>intel_fbo.h b/src/mesa/drivers/dri/i965/<wbr>intel_fbo.h<br>
index 74f235d..af4fb8a 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>intel_fbo.h<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>intel_fbo.h<br>
@@ -229,10 +229,7 @@ void<br>
intel_renderbuffer_upsample(<wbr>struct brw_context *brw,<br>
struct intel_renderbuffer *irb);<br>
<br>
-void brw_render_cache_set_clear(<wbr>struct brw_context *brw);<br>
-void brw_render_cache_set_add_bo(<wbr>struct brw_context *brw, struct brw_bo *bo);<br>
-void brw_render_cache_set_check_<wbr>flush(struct brw_context *brw, struct brw_bo *bo);<br>
-<br>
+void brw_cache_sets_clear(struct brw_context *brw);<br>
void brw_cache_flush_for_read(<wbr>struct brw_context *brw, struct brw_bo *bo);<br>
void brw_cache_flush_for_render(<wbr>struct brw_context *brw, struct brw_bo *bo);<br>
void brw_cache_flush_for_depth(<wbr>struct brw_context *brw, struct brw_bo *bo);<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.5.0.400.gff86faf<br>
<br>
</font></span></blockquote></div><br></div>