[Mesa-dev] [PATCH 39/70] i965: Refactor aperture testing and restarting
Chris Wilson
chris at chris-wilson.co.uk
Fri Aug 7 13:13:43 PDT 2015
Refactor the aperture test, roll back and retry logic to a common idiom.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
src/mesa/drivers/dri/i965/brw_batch.h | 9 +++++
src/mesa/drivers/dri/i965/brw_blorp.cpp | 43 +++++------------------
src/mesa/drivers/dri/i965/brw_compute.c | 39 +++++----------------
src/mesa/drivers/dri/i965/brw_draw.c | 37 ++++++--------------
src/mesa/drivers/dri/i965/intel_batchbuffer.c | 50 +++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/intel_blit.c | 47 +++++++------------------
6 files changed, 99 insertions(+), 126 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index 4a50e1b..bef544d 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -31,6 +31,8 @@
extern "C" {
#endif
+#include <setjmp.h>
+
#include <intel_bufmgr.h>
#include "util/list.h"
@@ -63,6 +65,9 @@ typedef struct brw_batch {
bool needs_sol_reset;
int gen;
+ jmp_buf jmpbuf;
+ bool repeat;
+ unsigned begin_count;
bool no_batch_wrap;
struct {
@@ -261,6 +266,10 @@ intel_batchbuffer_require_space(struct brw_batch *batch, GLuint sz,
intel_batchbuffer_emit_render_ring_prelude(batch);
}
+int brw_batch_begin(struct brw_batch *batch,
+ const int sz_bytes,
+ enum brw_gpu_ring ring);
+int brw_batch_end(struct brw_batch *batch);
#ifdef __cplusplus
}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 0b0cc8d..d4d5457 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -210,7 +210,9 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
{
struct gl_context *ctx = &brw->ctx;
uint32_t estimated_max_batch_usage = 1500;
- bool check_aperture_failed_once = false;
+
+ if (brw_batch_begin(&brw->batch, estimated_max_batch_usage, RENDER_RING) < 0)
+ return;
/* Flush the sampler and render caches. We definitely need to flush the
* sampler cache so that we get updated contents from the render cache for
@@ -221,13 +223,6 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
*/
brw_emit_mi_flush(brw);
-retry:
- intel_batchbuffer_require_space(&brw->batch, estimated_max_batch_usage, RENDER_RING);
- intel_batchbuffer_save_state(&brw->batch);
- drm_intel_bo *saved_bo = brw->batch.bo;
- uint32_t saved_used = USED_BATCH(&brw->batch);
- uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
-
switch (brw->gen) {
case 6:
gen6_blorp_exec(brw, params);
@@ -240,33 +235,18 @@ retry:
unreachable("not reached");
}
- /* Make sure we didn't wrap the batch unintentionally, and make sure we
- * reserved enough space that a wrap will never happen.
+ /* Flush the sampler cache so any texturing from the destination is
+ * coherent.
*/
- assert(brw->batch.bo == saved_bo);
- assert((USED_BATCH(&brw->batch) - saved_used) * 4 +
- (saved_state_batch_offset - brw->batch.state_batch_offset) <
- estimated_max_batch_usage);
- /* Shut up compiler warnings on release build */
- (void)saved_bo;
- (void)saved_used;
- (void)saved_state_batch_offset;
+ brw_emit_mi_flush(brw);
/* Check if the blorp op we just did would make our batch likely to fail to
* map all the BOs into the GPU at batch exec time later. If so, flush the
* batch and try again with nothing else in the batch.
*/
- if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
- if (!check_aperture_failed_once) {
- check_aperture_failed_once = true;
- intel_batchbuffer_reset_to_saved(&brw->batch);
- brw_batch_flush(&brw->batch, NULL);
- goto retry;
- } else {
- int ret = brw_batch_flush(&brw->batch, NULL);
- WARN_ONCE(ret == -ENOSPC,
- "i965: blorp emit exceeded available aperture space\n");
- }
+ if (brw_batch_end(&brw->batch)) {
+ WARN_ONCE(1, "i965: blorp emit exceeded available aperture space\n");
+ return;
}
brw_batch_maybe_flush(&brw->batch);
@@ -277,11 +257,6 @@ retry:
brw->ctx.NewDriverState = ~0ull;
brw->no_depth_or_stencil = false;
brw->ib.type = -1;
-
- /* Flush the sampler cache so any texturing from the destination is
- * coherent.
- */
- brw_emit_mi_flush(brw);
}
brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 4bff716..817d9ef 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -86,7 +86,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
{
struct brw_context *brw = brw_context(ctx);
int estimated_buffer_space_needed;
- bool fail_next = false;
+ int ret;
if (!_mesa_check_conditional_render(ctx))
return;
@@ -104,40 +104,17 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
estimated_buffer_space_needed += 1024; /* push constants */
estimated_buffer_space_needed += 512; /* misc. pad */
- /* Flush the batch if it's approaching full, so that we don't wrap while
- * we've got validated state that needs to be in the same batch as the
- * primitives.
- */
- intel_batchbuffer_require_space(&brw->batch, estimated_buffer_space_needed,
- RENDER_RING);
- intel_batchbuffer_save_state(&brw->batch);
+ ret = brw_batch_begin(&brw->batch, estimated_buffer_space_needed,
+ RENDER_RING);
+ if (ret < 0)
+ return;
- retry:
- brw->batch.no_batch_wrap = true;
brw_upload_compute_state(brw);
-
brw_emit_gpgpu_walker(brw, num_groups);
- brw->batch.no_batch_wrap = false;
-
- if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
- if (!fail_next) {
- intel_batchbuffer_reset_to_saved(&brw->batch);
- brw_batch_flush(&brw->batch, NULL);
- fail_next = true;
- goto retry;
- } else {
- if (brw_batch_flush(&brw->batch, NULL) == -ENOSPC) {
- static bool warned = false;
-
- if (!warned) {
- fprintf(stderr, "i965: Single compute shader dispatch "
- "exceeded available aperture space\n");
- warned = true;
- }
- }
- }
- }
+ WARN_ONCE(brw_batch_end(&brw->batch) == -ENOSPC,
+ "i965: Single compute shader dispatch exceeded "
+ "available aperture space\n");
/* Now that we know we haven't run out of aperture space, we can safely
* reset the dirty bits.
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index c2e7bda..c2ae0fc 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -404,7 +404,6 @@ brw_try_draw_prims(struct gl_context *ctx,
{
struct brw_context *brw = brw_context(ctx);
GLuint i;
- bool fail_next = false;
if (ctx->NewState)
_mesa_update_state(ctx);
@@ -451,6 +450,7 @@ brw_try_draw_prims(struct gl_context *ctx,
for (i = 0; i < nr_prims; i++) {
int estimated_max_prim_size;
const int sampler_state_size = 16;
+ int ret;
estimated_max_prim_size = 512; /* batchbuffer commands */
estimated_max_prim_size += BRW_MAX_TEX_UNIT *
@@ -463,9 +463,9 @@ brw_try_draw_prims(struct gl_context *ctx,
* we've got validated state that needs to be in the same batch as the
* primitives.
*/
- intel_batchbuffer_require_space(&brw->batch,
- estimated_max_prim_size, RENDER_RING);
- intel_batchbuffer_save_state(&brw->batch);
+ ret = brw_batch_begin(&brw->batch, estimated_max_prim_size, RENDER_RING);
+ if (ret < 0)
+ break;
if (brw->num_instances != prims[i].num_instances ||
brw->basevertex != prims[i].basevertex) {
@@ -501,35 +501,18 @@ brw_try_draw_prims(struct gl_context *ctx,
else
gen6_set_prim(brw, &prims[i]);
-retry:
-
/* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
* that the state updated in the loop outside of this block is that in
- * *_set_prim or brw_batch_flush(), which only impacts
- * brw->ctx.NewDriverState.
+ * *_set_prim, which only impacts brw->ctx.NewDriverState.
*/
- if (brw->ctx.NewDriverState) {
- brw->batch.no_batch_wrap = true;
- brw_upload_render_state(brw);
- }
+ if (brw->ctx.NewDriverState)
+ brw_upload_render_state(brw);
brw_emit_prim(brw, &prims[i], brw->primitive);
- brw->batch.no_batch_wrap = false;
-
- if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
- if (!fail_next) {
- intel_batchbuffer_reset_to_saved(&brw->batch);
- brw_batch_flush(&brw->batch, NULL);
- fail_next = true;
- goto retry;
- } else {
- int ret = brw_batch_flush(&brw->batch, NULL);
- WARN_ONCE(ret == -ENOSPC,
- "i965: Single primitive emit exceeded "
- "available aperture space\n");
- }
- }
+ WARN_ONCE(brw_batch_end(&brw->batch) == -ENOSPC,
+ "i965: Single primitive emit exceeded "
+ "available aperture space\n");
/* Now that we know we haven't run out of aperture space, we can safely
* reset the dirty bits.
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 2b5acd1..49c1c27 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -321,7 +321,9 @@ brw_batch_flush(struct brw_batch *batch, struct perf_debug *info)
brw->batch.reserved_space = 0;
+ brw->batch.begin_count++;
brw_batch_finish_hook(&brw->batch);
+ brw->batch.begin_count--;
/* Mark the end of the buffer. */
intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
@@ -404,3 +406,51 @@ intel_batchbuffer_data(struct brw_context *brw,
memcpy(brw->batch.map_next, data, bytes);
brw->batch.map_next += bytes >> 2;
}
+
+int brw_batch_begin(struct brw_batch *batch,
+ const int sz_bytes,
+ enum brw_gpu_ring ring)
+{
+ if (batch->begin_count++)
+ return 0;
+
+ intel_batchbuffer_require_space(batch, sz_bytes, ring);
+ intel_batchbuffer_save_state(batch);
+
+ batch->repeat = false;
+ batch->no_batch_wrap = true;
+
+ return setjmp(batch->jmpbuf);
+}
+
+int brw_batch_end(struct brw_batch *batch)
+{
+ assert(batch->begin_count);
+ if (--batch->begin_count)
+ return 0;
+
+ batch->no_batch_wrap = false;
+
+ if (dri_bufmgr_check_aperture_space(&batch->bo, 1)) {
+ if (!batch->repeat) {
+ enum brw_gpu_ring ring = batch->ring;
+
+ intel_batchbuffer_reset_to_saved(batch);
+ brw_batch_flush(batch, NULL);
+
+ batch->begin_count++;
+ batch->no_batch_wrap = true;
+
+ batch->ring = ring;
+ if (ring == RENDER_RING)
+ intel_batchbuffer_emit_render_ring_prelude(batch);
+
+ batch->repeat = true;
+ longjmp(batch->jmpbuf, 1);
+ }
+
+ return brw_batch_flush(batch, NULL);
+ }
+
+ return 0;
+}
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 22a5c5d..b6ad969 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -521,10 +521,9 @@ intelEmitCopyBlit(struct brw_context *brw,
GLshort w, GLshort h,
GLenum logic_op)
{
- GLuint CMD, BR13, pass = 0;
+ GLuint CMD, BR13;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
- brw_bo *aper_array[3];
bool dst_y_tiled = dst_tiling == I915_TILING_Y;
bool src_y_tiled = src_tiling == I915_TILING_Y;
bool use_fast_copy_blit = false;
@@ -532,25 +531,8 @@ intelEmitCopyBlit(struct brw_context *brw,
if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
return false;
- /* do space check before going any further */
- do {
- aper_array[0] = brw->batch.bo;
- aper_array[1] = dst_buffer;
- aper_array[2] = src_buffer;
-
- if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
- brw_batch_flush(&brw->batch, NULL);
- pass++;
- } else
- break;
- } while (pass < 2);
-
- if (pass >= 2)
- return false;
-
unsigned length = brw->gen >= 8 ? 10 : 8;
- intel_batchbuffer_require_space(&brw->batch, length * 4, BLT_RING);
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__func__,
src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -661,6 +643,9 @@ intelEmitCopyBlit(struct brw_context *brw,
assert(dst_offset + (dst_y + h - 1) * abs(dst_pitch) +
(w * cpp) <= dst_buffer->size);
+ if (brw_batch_begin(&brw->batch, 60, BLT_RING) < 0)
+ return false;
+
BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
OUT_BATCH(CMD | (length - 2));
OUT_BATCH(BR13 | (uint16_t)dst_pitch);
@@ -691,7 +676,7 @@ intelEmitCopyBlit(struct brw_context *brw,
brw_emit_mi_flush(brw);
- return true;
+ return brw_batch_end(&brw->batch) == 0;
}
bool
@@ -728,10 +713,6 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8;
- intel_batchbuffer_require_space(&brw->batch,
- (xy_setup_blt_length * 4) +
- (3 * 4) + dwords * 4, BLT_RING);
-
opcode = XY_SETUP_BLT_CMD;
if (cpp == 4)
opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
@@ -747,6 +728,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
if (dst_tiling != I915_TILING_NONE)
blit_cmd |= XY_DST_TILED;
+ if (brw_batch_begin(&brw->batch, 20 + dwords, BLT_RING) < 0)
+ return false;
+
BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
OUT_BATCH(opcode | (xy_setup_blt_length - 2));
OUT_BATCH(br13);
@@ -776,7 +760,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
brw_emit_mi_flush(brw);
- return true;
+ return brw_batch_end(&brw->batch) == 0;
}
/* We don't have a memmove-type blit like some other hardware, so we'll do a
@@ -854,7 +838,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
{
uint32_t BR13, CMD;
int pitch, cpp;
- brw_bo *aper_array[2];
pitch = mt->pitch;
cpp = mt->cpp;
@@ -872,14 +855,8 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
}
BR13 |= pitch;
- /* do space check before going any further */
- aper_array[0] = brw->batch.bo;
- aper_array[1] = mt->bo;
-
- if (drm_intel_bufmgr_check_aperture_space(aper_array,
- ARRAY_SIZE(aper_array)) != 0) {
- brw_batch_flush(&brw->batch, NULL);
- }
+ if (brw_batch_begin(&brw->batch, 20, BLT_RING) < 0)
+ return;
unsigned length = brw->gen >= 8 ? 7 : 6;
bool dst_y_tiled = mt->tiling == I915_TILING_Y;
@@ -902,4 +879,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
ADVANCE_BATCH_TILED(dst_y_tiled, false);
brw_emit_mi_flush(brw);
+
+ brw_batch_end(&brw->batch);
}
--
2.5.0
More information about the mesa-dev
mailing list