[Mesa-dev] [PATCH 37/51] i965: Refactor aperture testing and restarting
Chris Wilson
chris at chris-wilson.co.uk
Tue Jan 10 21:24:00 UTC 2017
Refactor the aperture test, roll back and retry logic to a common idiom.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
src/mesa/drivers/dri/i965/brw_batch.h | 18 ++++++++
src/mesa/drivers/dri/i965/brw_compute.c | 36 +++-------------
src/mesa/drivers/dri/i965/brw_draw.c | 33 ++++-----------
src/mesa/drivers/dri/i965/genX_blorp_exec.c | 49 ++++------------------
src/mesa/drivers/dri/i965/intel_batchbuffer.c | 59 +++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/intel_blit.c | 49 +++++++---------------
6 files changed, 112 insertions(+), 132 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index 3899f18f83..95cdbca4fd 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -31,6 +31,8 @@
extern "C" {
#endif
+#include <setjmp.h>
+
#include <intel_bufmgr.h>
#include "util/list.h"
@@ -64,6 +66,9 @@ typedef struct brw_batch {
bool state_base_address_emitted;
int gen;
+ jmp_buf jmpbuf;
+ bool repeat;
+ unsigned begin_count;
bool no_batch_wrap;
struct {
@@ -275,6 +280,19 @@ intel_batchbuffer_space(struct brw_batch *batch)
- USED_BATCH(batch)*4;
}
+int __brw_batch_begin(struct brw_batch *batch,
+ const int sz_bytes,
+ enum brw_gpu_ring ring);
+#define brw_batch_begin(batch, sz, ring) ({ \
+ int __ret = 0; \
+ if ((batch)->begin_count++ == 0) { \
+ __ret = __brw_batch_begin((batch), (sz), (ring)); \
+ if (__ret == 0) \
+ __ret = setjmp((batch)->jmpbuf); \
+ } \
+ __ret; })
+void brw_batch_end(struct brw_batch *batch);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 4e76817661..da10c6fba6 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -178,7 +178,7 @@ brw_dispatch_compute_common(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
int estimated_buffer_space_needed;
- bool fail_next = false;
+ int ret;
if (!_mesa_check_conditional_render(ctx))
return;
@@ -196,40 +196,16 @@ brw_dispatch_compute_common(struct gl_context *ctx)
estimated_buffer_space_needed += 1024; /* push constants */
estimated_buffer_space_needed += 512; /* misc. pad */
- /* Flush the batch if it's approaching full, so that we don't wrap while
- * we've got validated state that needs to be in the same batch as the
- * primitives.
- */
- intel_batchbuffer_require_space(&brw->batch, estimated_buffer_space_needed,
- RENDER_RING);
- intel_batchbuffer_save_state(&brw->batch);
+ ret = brw_batch_begin(&brw->batch, estimated_buffer_space_needed,
+ RENDER_RING);
+ if (ret < 0)
+ return;
- retry:
- brw->batch.no_batch_wrap = true;
brw_upload_compute_state(brw);
brw_emit_gpgpu_walker(brw);
- brw->batch.no_batch_wrap = false;
-
- if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
- if (!fail_next) {
- intel_batchbuffer_reset_to_saved(&brw->batch);
- brw_batch_flush(&brw->batch, NULL);
- fail_next = true;
- goto retry;
- } else {
- if (brw_batch_flush(&brw->batch, NULL) == -ENOSPC) {
- static bool warned = false;
-
- if (!warned) {
- fprintf(stderr, "i965: Single compute shader dispatch "
- "exceeded available aperture space\n");
- warned = true;
- }
- }
- }
- }
+ brw_batch_end(&brw->batch);
/* Now that we know we haven't run out of aperture space, we can safely
* reset the dirty bits.
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index afbee5167b..e2007774d7 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -442,7 +442,6 @@ brw_try_draw_prims(struct gl_context *ctx,
{
struct brw_context *brw = brw_context(ctx);
GLuint i;
- bool fail_next = false;
if (ctx->NewState)
_mesa_update_state(ctx);
@@ -495,6 +494,7 @@ brw_try_draw_prims(struct gl_context *ctx,
for (i = 0; i < nr_prims; i++) {
int estimated_max_prim_size;
const int sampler_state_size = 16;
+ int ret;
estimated_max_prim_size = 512; /* batchbuffer commands */
estimated_max_prim_size += BRW_MAX_TEX_UNIT *
@@ -507,9 +507,9 @@ brw_try_draw_prims(struct gl_context *ctx,
* we've got validated state that needs to be in the same batch as the
* primitives.
*/
- intel_batchbuffer_require_space(&brw->batch,
- estimated_max_prim_size, RENDER_RING);
- intel_batchbuffer_save_state(&brw->batch);
+ ret = brw_batch_begin(&brw->batch, estimated_max_prim_size, RENDER_RING);
+ if (ret < 0)
+ break;
if (brw->num_instances != prims[i].num_instances ||
brw->basevertex != prims[i].basevertex ||
@@ -581,35 +581,16 @@ brw_try_draw_prims(struct gl_context *ctx,
else
gen6_set_prim(brw, &prims[i]);
-retry:
-
/* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
* that the state updated in the loop outside of this block is that in
- * *_set_prim or brw_batch_flush(), which only impacts
- * brw->ctx.NewDriverState.
+ * *_set_prim, which only impacts brw->ctx.NewDriverState.
*/
- if (brw->ctx.NewDriverState) {
- brw->batch.no_batch_wrap = true;
+ if (brw->ctx.NewDriverState)
brw_upload_render_state(brw);
- }
brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream);
- brw->batch.no_batch_wrap = false;
-
- if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
- if (!fail_next) {
- intel_batchbuffer_reset_to_saved(&brw->batch);
- brw_batch_flush(&brw->batch, NULL);
- fail_next = true;
- goto retry;
- } else {
- int ret = brw_batch_flush(&brw->batch, NULL);
- WARN_ONCE(ret == -ENOSPC,
- "i965: Single primitive emit exceeded "
- "available aperture space\n");
- }
- }
+ brw_batch_end(&brw->batch);
/* Now that we know we haven't run out of aperture space, we can safely
* reset the dirty bits.
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 6b75a3b727..58a891f383 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -157,9 +157,10 @@ genX(blorp_exec)(struct blorp_batch *batch,
{
assert(batch->blorp->driver_ctx == batch->driver_batch);
struct brw_context *brw = batch->driver_batch;
- struct gl_context *ctx = &brw->ctx;
const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1800 : 1500;
- bool check_aperture_failed_once = false;
+
+ if (brw_batch_begin(&brw->batch, estimated_max_batch_usage, RENDER_RING) < 0)
+ return;
/* Flush the sampler and render caches. We definitely need to flush the
* sampler cache so that we get updated contents from the render cache for
@@ -172,14 +173,6 @@ genX(blorp_exec)(struct blorp_batch *batch,
brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
-retry:
- intel_batchbuffer_require_space(&brw->batch,
- estimated_max_batch_usage, RENDER_RING);
- intel_batchbuffer_save_state(&brw->batch);
- brw_bo *saved_bo = brw->batch.bo;
- uint32_t saved_used = USED_BATCH(&brw->batch);
- uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
-
#if GEN_GEN == 6
/* Emit workaround flushes when we switch from drawing to blorping. */
brw_emit_post_sync_nonzero_flush(brw);
@@ -203,35 +196,12 @@ retry:
blorp_exec(batch, params);
- /* Make sure we didn't wrap the batch unintentionally, and make sure we
- * reserved enough space that a wrap will never happen.
- */
- assert(brw->batch.bo == saved_bo);
- assert((USED_BATCH(brw->batch) - saved_used) * 4 +
- (saved_state_batch_offset - brw->batch.state_batch_offset) <
- estimated_max_batch_usage);
- /* Shut up compiler warnings on release build */
- (void)saved_bo;
- (void)saved_used;
- (void)saved_state_batch_offset;
-
- /* Check if the blorp op we just did would make our batch likely to fail to
- * map all the BOs into the GPU at batch exec time later. If so, flush the
- * batch and try again with nothing else in the batch.
+ /* Flush the sampler cache so any texturing from the destination is
+ * coherent.
*/
- if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
- if (!check_aperture_failed_once) {
- check_aperture_failed_once = true;
- intel_batchbuffer_reset_to_saved(&brw->batch);
- brw_batch_flush(&brw->batch, NULL);
- goto retry;
- } else {
- int ret = brw_batch_flush(&brw->batch, NULL);
- WARN_ONCE(ret == -ENOSPC,
- "i965: blorp emit exceeded available aperture space\n");
- }
- }
+ brw_emit_mi_flush(brw);
+ brw_batch_end(&brw->batch);
brw_batch_maybe_flush(&brw->batch);
/* We've smashed all state compared to what the normal 3D pipeline
@@ -240,9 +210,4 @@ retry:
brw->ctx.NewDriverState |= BRW_NEW_BLORP;
brw->no_depth_or_stencil = false;
brw->ib.type = -1;
-
- /* Flush the sampler cache so any texturing from the destination is
- * coherent.
- */
- brw_emit_mi_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index e0ba259e5e..4c5a640aff 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -348,7 +348,9 @@ brw_batch_flush(struct brw_batch *batch, struct perf_debug *info)
brw->batch.reserved_space = 0;
+ brw->batch.begin_count++;
brw_batch_finish_hook(&brw->batch);
+ brw->batch.begin_count--;
/* Mark the end of the buffer. */
intel_batchbuffer_emit_dword(&brw->batch, MI_BATCH_BUFFER_END);
@@ -431,3 +433,60 @@ intel_batchbuffer_data(struct brw_context *brw,
memcpy(brw->batch.map_next, data, bytes);
brw->batch.map_next += bytes >> 2;
}
+
+int __brw_batch_begin(struct brw_batch *batch,
+ const int sz_bytes,
+ enum brw_gpu_ring ring)
+{
+ assert(batch->begin_count == 1);
+ assert(!batch->repeat);
+
+ intel_batchbuffer_require_space(batch, sz_bytes, ring);
+ intel_batchbuffer_save_state(batch);
+
+ assert(!batch->no_batch_wrap);
+ batch->no_batch_wrap = true;
+
+ return 0;
+}
+
+void brw_batch_end(struct brw_batch *batch)
+{
+ assert(batch->begin_count);
+ if (--batch->begin_count)
+ return;
+
+ assert(batch->no_batch_wrap);
+ batch->no_batch_wrap = false;
+
+ if (dri_bufmgr_check_aperture_space(&batch->bo, 1)) {
+ if (!batch->repeat) {
+ enum brw_gpu_ring ring = batch->ring;
+
+ intel_batchbuffer_reset_to_saved(batch);
+ brw_batch_flush(batch, NULL);
+
+ batch->begin_count++;
+ batch->no_batch_wrap = true;
+
+ batch->ring = ring;
+ batch->repeat = true;
+ longjmp(batch->jmpbuf, 1);
+ }
+
+ if (unlikely(brw_batch_flush(batch, NULL) == -ENOSPC)) {
+ static GLuint msg_id;
+ if (!msg_id) {
+ struct brw_context *brw = container_of(batch, brw, batch);
+ fprintf(stderr, "WARNING: Aperture space exceeded!\n");
+ _mesa_gl_debug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_API,
+ MESA_DEBUG_TYPE_OTHER,
+ MESA_DEBUG_SEVERITY_HIGH,
+ "Aperture space exceeded!\n");
+ }
+ }
+ }
+
+ batch->repeat = false;
+}
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 8c832ac249..6208cc5e18 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -591,10 +591,9 @@ intelEmitCopyBlit(struct brw_context *brw,
GLshort w, GLshort h,
GLenum logic_op)
{
- GLuint CMD, BR13, pass = 0;
+ GLuint CMD, BR13;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
- brw_bo *aper_array[3];
bool dst_y_tiled = dst_tiling == I915_TILING_Y;
bool src_y_tiled = src_tiling == I915_TILING_Y;
bool use_fast_copy_blit = false;
@@ -604,25 +603,8 @@ intelEmitCopyBlit(struct brw_context *brw,
if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
return false;
- /* do space check before going any further */
- do {
- aper_array[0] = brw->batch.bo;
- aper_array[1] = dst_buffer;
- aper_array[2] = src_buffer;
-
- if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
- brw_batch_flush(&brw->batch, NULL);
- pass++;
- } else
- break;
- } while (pass < 2);
-
- if (pass >= 2)
- return false;
-
- unsigned length = brw->gen >= 8 ? 10 : 8;
+ const unsigned length = brw->gen >= 8 ? 10 : 8;
- intel_batchbuffer_require_space(&brw->batch, length * 4, BLT_RING);
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__func__,
src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -732,6 +714,9 @@ intelEmitCopyBlit(struct brw_context *brw,
assert(dst_x < dst_x2);
assert(dst_y < dst_y2);
+ if (brw_batch_begin(&brw->batch, 100, BLT_RING) < 0)
+ return false;
+
BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
OUT_BATCH(CMD | (length - 2));
OUT_BATCH(BR13 | (uint16_t)dst_pitch);
@@ -762,6 +747,7 @@ intelEmitCopyBlit(struct brw_context *brw,
brw_emit_mi_flush(brw);
+ brw_batch_end(&brw->batch);
return true;
}
@@ -799,10 +785,6 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8;
- intel_batchbuffer_require_space(&brw->batch,
- (xy_setup_blt_length * 4) +
- (3 * 4) + dwords * 4, BLT_RING);
-
opcode = XY_SETUP_BLT_CMD;
if (cpp == 4)
opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
@@ -818,6 +800,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
if (dst_tiling != I915_TILING_NONE)
blit_cmd |= XY_DST_TILED;
+ if (brw_batch_begin(&brw->batch, 60 + 4*dwords, BLT_RING) < 0)
+ return false;
+
BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
OUT_BATCH(opcode | (xy_setup_blt_length - 2));
OUT_BATCH(br13);
@@ -847,6 +832,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
brw_emit_mi_flush(brw);
+ brw_batch_end(&brw->batch);
return true;
}
@@ -921,7 +907,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
{
uint32_t BR13, CMD;
int pitch, cpp;
- brw_bo *aper_array[2];
pitch = mt->pitch;
cpp = mt->cpp;
@@ -939,16 +924,10 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
}
BR13 |= pitch;
- /* do space check before going any further */
- aper_array[0] = brw->batch.bo;
- aper_array[1] = mt->bo;
+ if (brw_batch_begin(&brw->batch, 80, BLT_RING) < 0)
+ return;
- if (drm_intel_bufmgr_check_aperture_space(aper_array,
- ARRAY_SIZE(aper_array)) != 0) {
- brw_batch_flush(&brw->batch, NULL);
- }
-
- unsigned length = brw->gen >= 8 ? 7 : 6;
+ const unsigned length = brw->gen >= 8 ? 7 : 6;
bool dst_y_tiled = mt->tiling == I915_TILING_Y;
/* We need to split the blit into chunks that each fit within the blitter's
@@ -991,4 +970,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
}
brw_emit_mi_flush(brw);
+
+ brw_batch_end(&brw->batch);
}
--
2.11.0
More information about the mesa-dev
mailing list