[Mesa-dev] [PATCH 8/8] i965: Avoid flushing the batch for every blorp op.
Eric Anholt
eric at anholt.net
Tue Aug 27 15:21:17 PDT 2013
This brings over the batch-wrap-prevention and aperture space checking
code from the normal brw_draw.c path, so that we don't need to flush the
batch every time.
There's a risk here if the intel_emit_post_sync_nonzero_flush() call isn't
high enough up in the state emit sequences -- before, we implicitly had
one at the batch flush before any state was emitted, so Mesa's workaround
emits didn't really matter.
Improves cairo-gl performance by 13.7733% +/- 1.74876% (n=30/32)
Improves minecraft apitrace performance by 1.03183% +/- 0.482297% (n=90).
Reduces low-resolution GLB 2.7 performance by 1.17553% +/- 0.432263% (n=88)
Reduces Lightsmark performance by 3.70246% +/- 0.322432% (n=126)
No statistically significant performance difference on unigine tropics
(n=10)
No statistically significant performance difference on openarena (n=755)
The two apps that are hurt happen to include stalls on busy buffer
objects, so I think this is an effect of missing out on an opportune
flush.
---
src/mesa/drivers/dri/i965/brw_blorp.cpp | 50 ++++++++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_blorp.h | 4 ---
src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 --------
src/mesa/drivers/dri/i965/gen7_blorp.cpp | 1 -
4 files changed, 50 insertions(+), 17 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 1576ff2..c566d1d 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
+#include <errno.h>
#include "intel_batchbuffer.h"
#include "intel_fbo.h"
@@ -191,6 +192,26 @@ intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
void
brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
{
+ struct gl_context *ctx = &brw->ctx;
+ uint32_t estimated_max_batch_usage = 1500;
+ bool check_aperture_failed_once = false;
+
+ /* Flush the sampler and render caches. We definitely need to flush the
+ * sampler cache so that we get updated contents from the render cache for
+ * the glBlitFramebuffer() source. Also, we are sometimes warned in the
+ * docs to flush the cache between reinterpretations of the same surface
+ * data with different formats, which blorp does for stencil and depth
+ * data.
+ */
+ intel_batchbuffer_emit_mi_flush(brw);
+
+retry:
+ intel_batchbuffer_require_space(brw, estimated_max_batch_usage, false);
+ intel_batchbuffer_save_state(brw);
+ drm_intel_bo *saved_bo = brw->batch.bo;
+ uint32_t saved_used = brw->batch.used;
+ uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
+
switch (brw->gen) {
case 6:
gen6_blorp_exec(brw, params);
@@ -204,6 +225,35 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
break;
}
+ /* Make sure we didn't wrap the batch unintentionally, and make sure we
+ * reserved enough space that a wrap will never happen.
+ */
+ assert(brw->batch.bo == saved_bo);
+ assert((brw->batch.used - saved_used) * 4 +
+ (saved_state_batch_offset - brw->batch.state_batch_offset) <
+ estimated_max_batch_usage);
+ /* Shut up compiler warnings on release build */
+ (void)saved_bo;
+ (void)saved_used;
+ (void)saved_state_batch_offset;
+
+ /* Check if the blorp op we just did would make our batch likely to fail to
+ * map all the BOs into the GPU at batch exec time later. If so, flush the
+ * batch and try again with nothing else in the batch.
+ */
+ if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
+ if (!check_aperture_failed_once) {
+ check_aperture_failed_once = true;
+ intel_batchbuffer_reset_to_saved(brw);
+ intel_batchbuffer_flush(brw);
+ goto retry;
+ } else {
+ int ret = intel_batchbuffer_flush(brw);
+ WARN_ONCE(ret == -ENOSPC,
+ "i965: blorp emit exceeded available aperture space\n");
+ }
+ }
+
if (unlikely(brw->always_flush_batch))
intel_batchbuffer_flush(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index dceb4fc..e03e27f 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -370,10 +370,6 @@ void
gen6_blorp_init(struct brw_context *brw);
void
-gen6_blorp_emit_batch_head(struct brw_context *brw,
- const brw_blorp_params *params);
-
-void
gen6_blorp_emit_state_base_address(struct brw_context *brw,
const brw_blorp_params *params);
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index da523e5..87b1d2b 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -45,17 +45,6 @@
* sizeof(float))
/** \} */
-void
-gen6_blorp_emit_batch_head(struct brw_context *brw,
- const brw_blorp_params *params)
-{
- /* To ensure that the batch contains only the resolve, flush the batch
- * before beginning and after finishing emitting the resolve packets.
- */
- intel_batchbuffer_flush(brw);
-}
-
-
/**
* CMD_STATE_BASE_ADDRESS
*
@@ -1030,7 +1019,6 @@ gen6_blorp_exec(struct brw_context *brw,
uint32_t wm_bind_bo_offset = 0;
uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
- gen6_blorp_emit_batch_head(brw, params);
gen6_emit_3dstate_multisample(brw, params->num_samples);
gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
gen6_blorp_emit_state_base_address(brw, params);
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index a387836..120f535 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -824,7 +824,6 @@ gen7_blorp_exec(struct brw_context *brw,
uint32_t sampler_offset = 0;
uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
- gen6_blorp_emit_batch_head(brw, params);
gen6_emit_3dstate_multisample(brw, params->num_samples);
gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
gen6_blorp_emit_state_base_address(brw, params);
--
1.8.4.rc3
More information about the mesa-dev
mailing list