[Mesa-dev] [PATCH 8/8] i965: Avoid flushing the batch for every blorp op.

Paul Berry stereotype441 at gmail.com
Wed Aug 28 09:19:16 PDT 2013


On 27 August 2013 15:21, Eric Anholt <eric at anholt.net> wrote:

> This brings over the batch-wrap-prevention and aperture space checking
> code from the normal brw_draw.c path, so that we don't need to flush the
> batch every time.
>
> There's a risk here if the intel_emit_post_sync_nonzero_flush() call isn't
> high enough up in the state emit sequences -- before, we implicitly had
> one at the batch flush before any state was emitted, so Mesa's workaround
> emits didn't really matter.
>
> Improves cairo-gl performance by 13.7733% +/- 1.74876% (n=30/32)
> Improves minecraft apitrace performance by 1.03183% +/- 0.482297% (n=90).
> Reduces low-resolution GLB 2.7 performance by 1.17553% +/- 0.432263% (n=88)
> Reduces Lightsmark performance by 3.70246% +/- 0.322432% (n=126)
> No statistically significant performance difference on unigine tropics
> (n=10)
> No statistically significant performance difference on openarena (n=755)
>
> The two apps that are hurt happen to include stalls on busy buffer
> objects, so I think this is an effect of missing out on an opportune
> flush.
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.cpp  | 50
> ++++++++++++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_blorp.h    |  4 ---
>  src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 --------
>  src/mesa/drivers/dri/i965/gen7_blorp.cpp |  1 -
>  4 files changed, 50 insertions(+), 17 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp
> b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> index 1576ff2..c566d1d 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> @@ -21,6 +21,7 @@
>   * IN THE SOFTWARE.
>   */
>
> +#include <errno.h>
>  #include "intel_batchbuffer.h"
>  #include "intel_fbo.h"
>
> @@ -191,6 +192,26 @@ intel_hiz_exec(struct brw_context *brw, struct
> intel_mipmap_tree *mt,
>  void
>  brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
>  {
> +   struct gl_context *ctx = &brw->ctx;
> +   uint32_t estimated_max_batch_usage = 1500;
> +   bool check_aperture_failed_once = false;
> +
> +   /* Flush the sampler and render caches.  We definitely need to flush
> the
> +    * sampler cache so that we get updated contents from the render cache
> for
> +    * the glBlitFramebuffer() source.  Also, we are sometimes warned in
> the
> +    * docs to flush the cache between reinterpretations of the same
> surface
> +    * data with different formats, which blorp does for stencil and depth
> +    * data.
> +    */
> +   intel_batchbuffer_emit_mi_flush(brw);
> +
> +retry:
> +   intel_batchbuffer_require_space(brw, estimated_max_batch_usage, false);
> +   intel_batchbuffer_save_state(brw);
> +   drm_intel_bo *saved_bo = brw->batch.bo;
> +   uint32_t saved_used = brw->batch.used;
> +   uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
> +
>     switch (brw->gen) {
>     case 6:
>        gen6_blorp_exec(brw, params);
> @@ -204,6 +225,35 @@ brw_blorp_exec(struct brw_context *brw, const
> brw_blorp_params *params)
>        break;
>     }
>
>
Would it be feasible to add an assertion here to verify that the amount of
batch space actually used by this blorp call is less than or equal to
estimated_max_batch_usage?  That would give me a lot of increased
confidence that the magic number 1500 is correct.

With the added assertion, the series is:

Reviewed-by: Paul Berry <stereotype441 at gmail.com>


> +   /* Make sure we didn't wrap the batch unintentionally, and make sure we
> +    * reserved enough space that a wrap will never happen.
> +    */
> +   assert(brw->batch.bo == saved_bo);
> +   assert((brw->batch.used - saved_used) * 4 +
> +          (saved_state_batch_offset - brw->batch.state_batch_offset) <
> +          estimated_max_batch_usage);
> +   /* Shut up compiler warnings on release build */
> +   (void)saved_bo;
> +   (void)saved_used;
> +   (void)saved_state_batch_offset;
> +
> +   /* Check if the blorp op we just did would make our batch likely to
> fail to
> +    * map all the BOs into the GPU at batch exec time later.  If so,
> flush the
> +    * batch and try again with nothing else in the batch.
> +    */
> +   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
> +      if (!check_aperture_failed_once) {
> +         check_aperture_failed_once = true;
> +         intel_batchbuffer_reset_to_saved(brw);
> +         intel_batchbuffer_flush(brw);
> +         goto retry;
> +      } else {
> +         int ret = intel_batchbuffer_flush(brw);
> +         WARN_ONCE(ret == -ENOSPC,
> +                   "i965: blorp emit exceeded available aperture
> space\n");
> +      }
> +   }
> +
>     if (unlikely(brw->always_flush_batch))
>        intel_batchbuffer_flush(brw);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h
> b/src/mesa/drivers/dri/i965/brw_blorp.h
> index dceb4fc..e03e27f 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -370,10 +370,6 @@ void
>  gen6_blorp_init(struct brw_context *brw);
>
>  void
> -gen6_blorp_emit_batch_head(struct brw_context *brw,
> -                           const brw_blorp_params *params);
> -
> -void
>  gen6_blorp_emit_state_base_address(struct brw_context *brw,
>                                     const brw_blorp_params *params);
>
> diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> index da523e5..87b1d2b 100644
> --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
> @@ -45,17 +45,6 @@
>                               * sizeof(float))
>  /** \} */
>
> -void
> -gen6_blorp_emit_batch_head(struct brw_context *brw,
> -                           const brw_blorp_params *params)
> -{
> -   /* To ensure that the batch contains only the resolve, flush the batch
> -    * before beginning and after finishing emitting the resolve packets.
> -    */
> -   intel_batchbuffer_flush(brw);
> -}
> -
> -
>  /**
>   * CMD_STATE_BASE_ADDRESS
>   *
> @@ -1030,7 +1019,6 @@ gen6_blorp_exec(struct brw_context *brw,
>     uint32_t wm_bind_bo_offset = 0;
>
>     uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
> -   gen6_blorp_emit_batch_head(brw, params);
>     gen6_emit_3dstate_multisample(brw, params->num_samples);
>     gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false,
> ~0u);
>     gen6_blorp_emit_state_base_address(brw, params);
> diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> index a387836..120f535 100644
> --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> @@ -824,7 +824,6 @@ gen7_blorp_exec(struct brw_context *brw,
>     uint32_t sampler_offset = 0;
>
>     uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
> -   gen6_blorp_emit_batch_head(brw, params);
>     gen6_emit_3dstate_multisample(brw, params->num_samples);
>     gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false,
> ~0u);
>     gen6_blorp_emit_state_base_address(brw, params);
> --
> 1.8.4.rc3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130828/65876d02/attachment.html>


More information about the mesa-dev mailing list