[Mesa-dev] [PATCH 2/2] i965: Avoid flushing the batch for every blorp op.

Eric Anholt eric at anholt.net
Thu Jun 27 13:14:15 PDT 2013


This brings over the batch-wrap-prevention and aperture space checking
code from the normal brw_draw.c path, so that we don't need to flush the
batch every time.

There's a risk here if the intel_emit_post_sync_nonzero_flush() call isn't
high enough up in the state emit sequences -- before, we implicitly had
one at the batch flush before any state was emitted, so Mesa's workaround
emits didn't really matter.

Improves cairo-gl performance by 13.7733% +/- 1.74876% (n=30/32)
No statistically significant performance difference on unigine tropics
(n=10)
No statistically significant performance difference on openarena (n=755)
No statistically significant performance difference on Lightsmark (n=15,
though this may be an issue of test power -- looks like a ~.3%
performance hit)
Reduces low-resolution GLB 2.7 performance by 0.604517% +/- 0.140544%
(n=132/133)
---
I've got the test system running more Lightsmark now -- the bimodal
distribution of its results was killing the stats, and I'd bumped the
power cable and it ran out of battery and died.

I'm a little mystified by the small GLB and possibly LM regressions.
My theory was the first-post-swap-batch throttling, except
that we've got about 5 batches per frame on GLB.

 src/mesa/drivers/dri/i965/brw_blorp.cpp  | 51 +++++++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_blorp.h    |  4 ---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 14 ---------
 src/mesa/drivers/dri/i965/gen7_blorp.cpp |  1 -
 4 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 92bee3e..d02c660 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  */
 
+#include <errno.h>
 #include "intel_batchbuffer.h"
 #include "intel_fbo.h"
 
@@ -191,7 +192,26 @@ intel_hiz_exec(struct intel_context *intel, struct intel_mipmap_tree *mt,
 void
 brw_blorp_exec(struct intel_context *intel, const brw_blorp_params *params)
 {
-   struct brw_context *brw = brw_context(&intel->ctx);
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(ctx);
+   uint32_t estimated_max_batch_usage = 1500;
+   bool check_aperture_failed_once = false;
+
+   /* Flush the sampler and render caches.  We definitely need to flush the
+    * sampler cache so that we get updated contents from the render cache for
+    * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
+    * docs to flush the cache between reinterpretations of the same surface
+    * data with different formats, which blorp does for stencil and depth
+    * data.
+    */
+   intel_batchbuffer_emit_mi_flush(intel);
+
+retry:
+   intel_batchbuffer_require_space(intel, estimated_max_batch_usage, false);
+   intel_batchbuffer_save_state(intel);
+   drm_intel_bo *saved_bo = intel->batch.bo;
+   uint32_t saved_used = intel->batch.used;
+   uint32_t saved_state_batch_offset = intel->batch.state_batch_offset;
 
    switch (intel->gen) {
    case 6:
@@ -206,6 +226,35 @@ brw_blorp_exec(struct intel_context *intel, const brw_blorp_params *params)
       break;
    }
 
+   /* Make sure we didn't wrap the batch unintentionally, and make sure we
+    * reserved enough space that a wrap will never happen.
+    */
+   assert(intel->batch.bo == saved_bo);
+   assert((intel->batch.used - saved_used) * 4 +
+          (saved_state_batch_offset - intel->batch.state_batch_offset) <
+          estimated_max_batch_usage);
+   /* Shut up compiler warnings on release build */
+   (void)saved_bo;
+   (void)saved_used;
+   (void)saved_state_batch_offset;
+
+   /* Check if the blorp op we just did would make our batch likely to fail to
+    * map all the BOs into the GPU at batch exec time later.  If so, flush the
+    * batch and try again with nothing else in the batch.
+    */
+   if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) {
+      if (!check_aperture_failed_once) {
+         check_aperture_failed_once = true;
+         intel_batchbuffer_reset_to_saved(intel);
+         intel_batchbuffer_flush(intel);
+         goto retry;
+      } else {
+         int ret = intel_batchbuffer_flush(intel);
+         WARN_ONCE(ret == -ENOSPC,
+                   "i965: blorp emit exceeded available aperture space\n");
+      }
+   }
+
    if (unlikely(intel->always_flush_batch))
       intel_batchbuffer_flush(intel);
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index ffc27cc..a726201 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -353,10 +353,6 @@ void
 gen6_blorp_init(struct brw_context *brw);
 
 void
-gen6_blorp_emit_batch_head(struct brw_context *brw,
-                           const brw_blorp_params *params);
-
-void
 gen6_blorp_emit_state_base_address(struct brw_context *brw,
                                    const brw_blorp_params *params);
 
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 3ccd90e..de6007d 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -45,19 +45,6 @@
                              * sizeof(float))
 /** \} */
 
-void
-gen6_blorp_emit_batch_head(struct brw_context *brw,
-                           const brw_blorp_params *params)
-{
-   struct gl_context *ctx = &brw->intel.ctx;
-
-   /* To ensure that the batch contains only the resolve, flush the batch
-    * before beginning and after finishing emitting the resolve packets.
-    */
-   intel_flush(ctx);
-}
-
-
 /**
  * CMD_STATE_BASE_ADDRESS
  *
@@ -1057,7 +1044,6 @@ gen6_blorp_exec(struct intel_context *intel,
    uint32_t wm_bind_bo_offset = 0;
 
    uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
-   gen6_blorp_emit_batch_head(brw, params);
    gen6_emit_3dstate_multisample(brw, params->num_samples);
    gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
    gen6_blorp_emit_state_base_address(brw, params);
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 822f954..8e0f7b0 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -856,7 +856,6 @@ gen7_blorp_exec(struct intel_context *intel,
    uint32_t sampler_offset = 0;
 
    uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
-   gen6_blorp_emit_batch_head(brw, params);
    gen6_emit_3dstate_multisample(brw, params->num_samples);
    gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
    gen6_blorp_emit_state_base_address(brw, params);
-- 
1.8.3.rc0



More information about the mesa-dev mailing list