[Mesa-dev] [PATCH 39/70] i965: Refactor aperture testing and restarting

Chris Wilson chris at chris-wilson.co.uk
Fri Aug 7 13:13:43 PDT 2015


Refactor the aperture test, roll back and retry logic to a common idiom.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_batch.h         |  9 +++++
 src/mesa/drivers/dri/i965/brw_blorp.cpp       | 43 +++++------------------
 src/mesa/drivers/dri/i965/brw_compute.c       | 39 +++++----------------
 src/mesa/drivers/dri/i965/brw_draw.c          | 37 ++++++--------------
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 50 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/intel_blit.c        | 47 +++++++------------------
 6 files changed, 99 insertions(+), 126 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index 4a50e1b..bef544d 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -31,6 +31,8 @@
 extern "C" {
 #endif
 
+#include <setjmp.h>
+
 #include <intel_bufmgr.h>
 
 #include "util/list.h"
@@ -63,6 +65,9 @@ typedef struct brw_batch {
    bool needs_sol_reset;
    int gen;
 
+   jmp_buf jmpbuf;
+   bool repeat;
+   unsigned begin_count;
    bool no_batch_wrap;
 
    struct {
@@ -261,6 +266,10 @@ intel_batchbuffer_require_space(struct brw_batch *batch, GLuint sz,
       intel_batchbuffer_emit_render_ring_prelude(batch);
 }
 
+int brw_batch_begin(struct brw_batch *batch,
+                    const int sz_bytes,
+                    enum brw_gpu_ring ring);
+int brw_batch_end(struct brw_batch *batch);
 
 #ifdef __cplusplus
 }
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 0b0cc8d..d4d5457 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -210,7 +210,9 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
 {
    struct gl_context *ctx = &brw->ctx;
    uint32_t estimated_max_batch_usage = 1500;
-   bool check_aperture_failed_once = false;
+
+   if (brw_batch_begin(&brw->batch, estimated_max_batch_usage, RENDER_RING) < 0)
+      return;
 
    /* Flush the sampler and render caches.  We definitely need to flush the
     * sampler cache so that we get updated contents from the render cache for
@@ -221,13 +223,6 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
     */
    brw_emit_mi_flush(brw);
 
-retry:
-   intel_batchbuffer_require_space(&brw->batch, estimated_max_batch_usage, RENDER_RING);
-   intel_batchbuffer_save_state(&brw->batch);
-   drm_intel_bo *saved_bo = brw->batch.bo;
-   uint32_t saved_used = USED_BATCH(&brw->batch);
-   uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
-
    switch (brw->gen) {
    case 6:
       gen6_blorp_exec(brw, params);
@@ -240,33 +235,18 @@ retry:
       unreachable("not reached");
    }
 
-   /* Make sure we didn't wrap the batch unintentionally, and make sure we
-    * reserved enough space that a wrap will never happen.
+   /* Flush the sampler cache so any texturing from the destination is
+    * coherent.
     */
-   assert(brw->batch.bo == saved_bo);
-   assert((USED_BATCH(&brw->batch) - saved_used) * 4 +
-          (saved_state_batch_offset - brw->batch.state_batch_offset) <
-          estimated_max_batch_usage);
-   /* Shut up compiler warnings on release build */
-   (void)saved_bo;
-   (void)saved_used;
-   (void)saved_state_batch_offset;
+   brw_emit_mi_flush(brw);
 
    /* Check if the blorp op we just did would make our batch likely to fail to
     * map all the BOs into the GPU at batch exec time later.  If so, flush the
     * batch and try again with nothing else in the batch.
     */
-   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
-      if (!check_aperture_failed_once) {
-         check_aperture_failed_once = true;
-         intel_batchbuffer_reset_to_saved(&brw->batch);
-         brw_batch_flush(&brw->batch, NULL);
-         goto retry;
-      } else {
-         int ret = brw_batch_flush(&brw->batch, NULL);
-         WARN_ONCE(ret == -ENOSPC,
-                   "i965: blorp emit exceeded available aperture space\n");
-      }
+   if (brw_batch_end(&brw->batch)) {
+      WARN_ONCE(1, "i965: blorp emit exceeded available aperture space\n");
+      return;
    }
 
    brw_batch_maybe_flush(&brw->batch);
@@ -277,11 +257,6 @@ retry:
    brw->ctx.NewDriverState = ~0ull;
    brw->no_depth_or_stencil = false;
    brw->ib.type = -1;
-
-   /* Flush the sampler cache so any texturing from the destination is
-    * coherent.
-    */
-   brw_emit_mi_flush(brw);
 }
 
 brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 4bff716..817d9ef 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -86,7 +86,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
 {
    struct brw_context *brw = brw_context(ctx);
    int estimated_buffer_space_needed;
-   bool fail_next = false;
+   int ret;
 
    if (!_mesa_check_conditional_render(ctx))
       return;
@@ -104,40 +104,17 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
    estimated_buffer_space_needed += 1024; /* push constants */
    estimated_buffer_space_needed += 512; /* misc. pad */
 
-   /* Flush the batch if it's approaching full, so that we don't wrap while
-    * we've got validated state that needs to be in the same batch as the
-    * primitives.
-    */
-   intel_batchbuffer_require_space(&brw->batch, estimated_buffer_space_needed,
-                                   RENDER_RING);
-   intel_batchbuffer_save_state(&brw->batch);
+   ret = brw_batch_begin(&brw->batch, estimated_buffer_space_needed,
+                         RENDER_RING);
+   if (ret < 0)
+      return;
 
- retry:
-   brw->batch.no_batch_wrap = true;
    brw_upload_compute_state(brw);
-
    brw_emit_gpgpu_walker(brw, num_groups);
 
-   brw->batch.no_batch_wrap = false;
-
-   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
-      if (!fail_next) {
-         intel_batchbuffer_reset_to_saved(&brw->batch);
-         brw_batch_flush(&brw->batch, NULL);
-         fail_next = true;
-         goto retry;
-      } else {
-         if (brw_batch_flush(&brw->batch, NULL) == -ENOSPC) {
-            static bool warned = false;
-
-            if (!warned) {
-               fprintf(stderr, "i965: Single compute shader dispatch "
-                       "exceeded available aperture space\n");
-               warned = true;
-            }
-         }
-      }
-   }
+   WARN_ONCE(brw_batch_end(&brw->batch) == -ENOSPC,
+             "i965: Single compute shader dispatch exceeded "
+             "available aperture space\n");
 
    /* Now that we know we haven't run out of aperture space, we can safely
     * reset the dirty bits.
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index c2e7bda..c2ae0fc 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -404,7 +404,6 @@ brw_try_draw_prims(struct gl_context *ctx,
 {
    struct brw_context *brw = brw_context(ctx);
    GLuint i;
-   bool fail_next = false;
 
    if (ctx->NewState)
       _mesa_update_state(ctx);
@@ -451,6 +450,7 @@ brw_try_draw_prims(struct gl_context *ctx,
    for (i = 0; i < nr_prims; i++) {
       int estimated_max_prim_size;
       const int sampler_state_size = 16;
+      int ret;
 
       estimated_max_prim_size = 512; /* batchbuffer commands */
       estimated_max_prim_size += BRW_MAX_TEX_UNIT *
@@ -463,9 +463,9 @@ brw_try_draw_prims(struct gl_context *ctx,
        * we've got validated state that needs to be in the same batch as the
        * primitives.
        */
-      intel_batchbuffer_require_space(&brw->batch,
-                                      estimated_max_prim_size, RENDER_RING);
-      intel_batchbuffer_save_state(&brw->batch);
+      ret = brw_batch_begin(&brw->batch, estimated_max_prim_size, RENDER_RING);
+      if (ret < 0)
+         break;
 
       if (brw->num_instances != prims[i].num_instances ||
           brw->basevertex != prims[i].basevertex) {
@@ -501,35 +501,18 @@ brw_try_draw_prims(struct gl_context *ctx,
       else
 	 gen6_set_prim(brw, &prims[i]);
 
-retry:
-
       /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
        * that the state updated in the loop outside of this block is that in
-       * *_set_prim or brw_batch_flush(), which only impacts
-       * brw->ctx.NewDriverState.
+       * *_set_prim, which only impacts brw->ctx.NewDriverState.
        */
-      if (brw->ctx.NewDriverState) {
-         brw->batch.no_batch_wrap = true;
-	 brw_upload_render_state(brw);
-      }
+      if (brw->ctx.NewDriverState)
+         brw_upload_render_state(brw);
 
       brw_emit_prim(brw, &prims[i], brw->primitive);
 
-      brw->batch.no_batch_wrap = false;
-
-      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
-	 if (!fail_next) {
-            intel_batchbuffer_reset_to_saved(&brw->batch);
-            brw_batch_flush(&brw->batch, NULL);
-	    fail_next = true;
-	    goto retry;
-	 } else {
-            int ret = brw_batch_flush(&brw->batch, NULL);
-            WARN_ONCE(ret == -ENOSPC,
-                      "i965: Single primitive emit exceeded "
-                      "available aperture space\n");
-	 }
-      }
+      WARN_ONCE(brw_batch_end(&brw->batch) == -ENOSPC,
+                "i965: Single primitive emit exceeded "
+                "available aperture space\n");
 
       /* Now that we know we haven't run out of aperture space, we can safely
        * reset the dirty bits.
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 2b5acd1..49c1c27 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -321,7 +321,9 @@ brw_batch_flush(struct brw_batch *batch, struct perf_debug *info)
 
    brw->batch.reserved_space = 0;
 
+   brw->batch.begin_count++;
    brw_batch_finish_hook(&brw->batch);
+   brw->batch.begin_count--;
 
    /* Mark the end of the buffer. */
    intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
@@ -404,3 +406,51 @@ intel_batchbuffer_data(struct brw_context *brw,
    memcpy(brw->batch.map_next, data, bytes);
    brw->batch.map_next += bytes >> 2;
 }
+
+int brw_batch_begin(struct brw_batch *batch,
+                    const int sz_bytes,
+                    enum brw_gpu_ring ring)
+{
+   if (batch->begin_count++)
+      return 0;
+
+   intel_batchbuffer_require_space(batch, sz_bytes, ring);
+   intel_batchbuffer_save_state(batch);
+
+   batch->repeat = false;
+   batch->no_batch_wrap = true;
+
+   return setjmp(batch->jmpbuf);
+}
+
+int brw_batch_end(struct brw_batch *batch)
+{
+   assert(batch->begin_count);
+   if (--batch->begin_count)
+      return 0;
+
+   batch->no_batch_wrap = false;
+
+   if (dri_bufmgr_check_aperture_space(&batch->bo, 1)) {
+      if (!batch->repeat) {
+         enum brw_gpu_ring ring = batch->ring;
+
+         intel_batchbuffer_reset_to_saved(batch);
+         brw_batch_flush(batch, NULL);
+
+         batch->begin_count++;
+         batch->no_batch_wrap = true;
+
+         batch->ring = ring;
+         if (ring == RENDER_RING)
+            intel_batchbuffer_emit_render_ring_prelude(batch);
+
+         batch->repeat = true;
+         longjmp(batch->jmpbuf, 1);
+      }
+
+      return brw_batch_flush(batch, NULL);
+   }
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 22a5c5d..b6ad969 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -521,10 +521,9 @@ intelEmitCopyBlit(struct brw_context *brw,
                   GLshort w, GLshort h,
                   GLenum logic_op)
 {
-   GLuint CMD, BR13, pass = 0;
+   GLuint CMD, BR13;
    int dst_y2 = dst_y + h;
    int dst_x2 = dst_x + w;
-   brw_bo *aper_array[3];
    bool dst_y_tiled = dst_tiling == I915_TILING_Y;
    bool src_y_tiled = src_tiling == I915_TILING_Y;
    bool use_fast_copy_blit = false;
@@ -532,25 +531,8 @@ intelEmitCopyBlit(struct brw_context *brw,
    if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
       return false;
 
-   /* do space check before going any further */
-   do {
-       aper_array[0] = brw->batch.bo;
-       aper_array[1] = dst_buffer;
-       aper_array[2] = src_buffer;
-
-       if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
-           brw_batch_flush(&brw->batch, NULL);
-           pass++;
-       } else
-           break;
-   } while (pass < 2);
-
-   if (pass >= 2)
-      return false;
-
    unsigned length = brw->gen >= 8 ? 10 : 8;
 
-   intel_batchbuffer_require_space(&brw->batch, length * 4, BLT_RING);
    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
        __func__,
        src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -661,6 +643,9 @@ intelEmitCopyBlit(struct brw_context *brw,
    assert(dst_offset + (dst_y + h - 1) * abs(dst_pitch) +
           (w * cpp) <= dst_buffer->size);
 
+   if (brw_batch_begin(&brw->batch, 60, BLT_RING) < 0)
+      return false;
+
    BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
    OUT_BATCH(CMD | (length - 2));
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
@@ -691,7 +676,7 @@ intelEmitCopyBlit(struct brw_context *brw,
 
    brw_emit_mi_flush(brw);
 
-   return true;
+   return brw_batch_end(&brw->batch) == 0;
 }
 
 bool
@@ -728,10 +713,6 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
        dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
 
    unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8;
-   intel_batchbuffer_require_space(&brw->batch,
-                                   (xy_setup_blt_length * 4) +
-                                   (3 * 4) + dwords * 4, BLT_RING);
-
    opcode = XY_SETUP_BLT_CMD;
    if (cpp == 4)
       opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
@@ -747,6 +728,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
    if (dst_tiling != I915_TILING_NONE)
       blit_cmd |= XY_DST_TILED;
 
+   if (brw_batch_begin(&brw->batch, 20 + dwords, BLT_RING) < 0)
+      return false;
+
    BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
    OUT_BATCH(opcode | (xy_setup_blt_length - 2));
    OUT_BATCH(br13);
@@ -776,7 +760,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
 
    brw_emit_mi_flush(brw);
 
-   return true;
+   return brw_batch_end(&brw->batch) == 0;
 }
 
 /* We don't have a memmove-type blit like some other hardware, so we'll do a
@@ -854,7 +838,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
 {
    uint32_t BR13, CMD;
    int pitch, cpp;
-   brw_bo *aper_array[2];
 
    pitch = mt->pitch;
    cpp = mt->cpp;
@@ -872,14 +855,8 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
    }
    BR13 |= pitch;
 
-   /* do space check before going any further */
-   aper_array[0] = brw->batch.bo;
-   aper_array[1] = mt->bo;
-
-   if (drm_intel_bufmgr_check_aperture_space(aper_array,
-					     ARRAY_SIZE(aper_array)) != 0) {
-      brw_batch_flush(&brw->batch, NULL);
-   }
+   if (brw_batch_begin(&brw->batch, 20, BLT_RING) < 0)
+      return;
 
    unsigned length = brw->gen >= 8 ? 7 : 6;
    bool dst_y_tiled = mt->tiling == I915_TILING_Y;
@@ -902,4 +879,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
    ADVANCE_BATCH_TILED(dst_y_tiled, false);
 
    brw_emit_mi_flush(brw);
+
+   brw_batch_end(&brw->batch);
 }
-- 
2.5.0



More information about the mesa-dev mailing list