[Mesa-dev] [PATCH 49/51] i965: Enable brw-batch dirty tracking
Chris Wilson
chris at chris-wilson.co.uk
Tue Jan 10 21:24:12 UTC 2017
Remove the old hashtable approach and switch over to the inline write
tracking with brw-batch.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
src/mesa/drivers/dri/i965/brw_batch.c | 70 ++++++++++++++++++++++++++-
src/mesa/drivers/dri/i965/brw_batch.h | 10 +---
src/mesa/drivers/dri/i965/brw_context.c | 24 +++++----
src/mesa/drivers/dri/i965/brw_context.h | 17 ++++++-
src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +-
src/mesa/drivers/dri/i965/brw_pipe_control.c | 2 +-
src/mesa/drivers/dri/i965/gen8_depth_state.c | 2 +-
src/mesa/drivers/dri/i965/intel_blit.c | 3 +-
src/mesa/drivers/dri/i965/intel_fbo.c | 38 ---------------
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 2 +-
10 files changed, 108 insertions(+), 64 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c
index b257d000f8..515a81bf89 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_batch.c
@@ -990,6 +990,8 @@ uint64_t __brw_batch_reloc(struct brw_batch *batch,
batch->needs_pipecontrol_ggtt_wa)
target_bo->exec->flags |= EXEC_OBJECT_NEEDS_GTT;
}
+
+ batch->flags |= BATCH_DIRTY;
}
return __brw_reloc_address(target_bo, target_offset);
@@ -1056,6 +1058,72 @@ static uint32_t __brw_batch_emit_seqno(struct brw_batch *batch,
}
/*
+ * Mark a bo as being written to by this batch.
+ *
+ * We frequently dirty a buffer and then emit a global cache flush
+ * cleaning all the dirty buffers within a batch. Afterwards, we may
+ * then write to the same buffer, but may not re-emit a relocation and
+ * so we need to notify that the buffer is now dirty again. Normally
+ * we can rely the relocation marking the write buffers as dirty.
+ *
+ * All caches are flushed by the kernel between batches, so at the end
+ * of each batch we can mark all buffers as clean again. (Before we can
+ * access the buffer, either by the GPU in the next batch or by the CPU
+ * following a set-domain call, that access will be after the flush has
+ * finished.)
+ */
+void brw_bo_mark_dirty(struct brw_batch *batch, struct brw_bo *bo)
+{
+ if (unlikely(bo->batch != batch)) {
+ bo = __brw_batch_lookup_handle(batch, bo->handle);
+ assert(bo);
+ }
+ assert(bo->batch == batch);
+ assert(bo != bo->batch->bo);
+
+ /* We should only be called on objects already in the batch for writing */
+ if (bo->exec == NULL)
+ return;
+
+ assert(brw_fence_get_request(&bo->read[batch->ring]) == batch->next_request);
+ assert(brw_fence_get_request(&bo->write) == batch->next_request);
+
+ if (bo->dirty)
+ return;
+
+ list_move(&bo->write.link, &batch->next_request->fences);
+ bo->dirty = true;
+ batch->flags |= BATCH_DIRTY;
+}
+
+/*
+ * At the end of each batch and when explicitly flushing caches within
+ * a batch, we can mark all the buffers within that batch as now clean.
+ */
+void brw_batch_clear_dirty(struct brw_batch *batch)
+{
+ struct list_head * const list = &batch->next_request->fences;
+
+ if (!(batch->flags & BATCH_DIRTY))
+ return;
+
+ list_for_each_entry(struct brw_fence, fence, list, link) {
+ struct brw_bo *bo;
+
+ if (brw_fence_get_signal(fence) != WRITE_SIGNAL)
+ break;
+
+ bo = container_of(fence, bo, write);
+ if (!bo->dirty)
+ break;
+
+ bo->dirty = false;
+ }
+
+ batch->flags &= ~BATCH_DIRTY;
+}
+
+/*
* Close the batch by writing all the tail commands (to store register
* values between batches, disable profiling, etc). And then to end it all
* we set MI_BATCH_BUFFER_END.
@@ -1397,8 +1465,6 @@ skip:
__brw_batch_throttle(batch, rq);
__brw_batch_retire(batch);
- brw_batch_clear_dirty(batch);
-
return __brw_batch_next(batch);
}
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index 264868f253..074a13f550 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -107,7 +107,8 @@ typedef struct brw_batch {
uint32_t *tail;
uint32_t flags;
-#define BATCH_HAS_STATE_BASE (1 << 31)
+#define BATCH_DIRTY (1 << 31)
+#define BATCH_HAS_STATE_BASE (1 << 30)
uint32_t base_flags;
enum brw_gpu_ring ring;
@@ -185,13 +186,6 @@ typedef struct brw_batch {
struct list_head borrowed[1<<BORROWED_BITS];
struct brw_bo *freed_bo;
-
- /**
- * Set of brw_bo* that have been rendered to within this batchbuffer
- * and would need flushing before being used from another cache domain that
- * isn't coherent with it (i.e. the sampler).
- */
- struct set *render_cache;
} brw_batch;
int brw_batch_init(struct brw_batch *batch,
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 15e467b00a..488d76be8e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -228,6 +228,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
struct brw_context *brw = brw_context(ctx);
struct intel_texture_object *tex_obj;
struct intel_renderbuffer *depth_irb;
+ bool flush = false;
if (ctx->swrast_context)
_swrast_InvalidateState(ctx, new_state);
@@ -263,8 +264,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ?
0 : INTEL_MIPTREE_IGNORE_CCS_E;
intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, flags);
- if (brw_check_dirty(brw, tex_obj->mt->bo))
- brw_mi_flush(brw, RENDER_RING);
+ flush |= brw_check_dirty(tex_obj->mt->bo);
if (tex_obj->base.StencilSampling ||
tex_obj->mt->format == MESA_FORMAT_S_UINT8) {
@@ -283,8 +283,14 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
struct gl_image_unit *u =
&ctx->ImageUnits[shader->Program->sh.ImageUnits[j]];
tex_obj = intel_texture_object(u->TexObj);
+ if (!tex_obj)
+ continue;
- if (tex_obj && tex_obj->mt) {
+ if (tex_obj->base.Target == GL_TEXTURE_BUFFER) {
+ struct intel_buffer_object *intel_obj =
+ intel_buffer_object(tex_obj->base.BufferObject);
+ flush |= brw_check_dirty(intel_obj->buffer);
+ } else if (tex_obj->mt) {
/* Access to images is implemented using indirect messages
* against data port. Normal render target write understands
* lossless compression but unfortunately the typed/untyped
@@ -300,8 +306,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
"off lossless compression");
}
- if (brw_check_dirty(brw, tex_obj->mt->bo))
- brw_mi_flush(brw, RENDER_RING);
+ flush |= brw_check_dirty(tex_obj->mt->bo);
}
}
}
@@ -321,8 +326,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
intel_miptree_resolve_color(
brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count,
INTEL_MIPTREE_IGNORE_CCS_E))
- if (brw_check_dirty(brw, irb->mt->bo))
- brw_emit_mi_flush(brw);
+ flush |= brw_check_dirty(irb->mt->bo);
}
}
@@ -353,11 +357,13 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
*/
assert(!intel_miptree_is_lossless_compressed(brw, mt));
intel_miptree_all_slices_resolve_color(brw, mt, 0);
- if (brw_check_dirty(brw, mt->bo))
- brw_mi_flush(brw, RENDER_RING);
+ flush |= brw_check_dirty(mt->bo);
}
}
+ if (flush)
+ brw_mi_flush(brw, RENDER_RING);
+
_mesa_lock_context_textures(ctx);
if (new_state & _NEW_BUFFERS) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 5e2df95508..cd31b730f5 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1680,7 +1680,22 @@ void gen7_emit_cs_stall_flush(struct brw_context *brw);
void brw_mi_flush(struct brw_context *brw, enum brw_gpu_ring ring);
void brw_pipe_control_flush(struct brw_context *brw, unsigned flags);
-bool brw_check_dirty(struct brw_context *ctx, brw_bo *bo);
+/**
+ * Emits an appropriate flush for a BO if it has been rendered to within the
+ * same batchbuffer as a read that's about to be emitted.
+ *
+ * The GPU has separate, incoherent caches for the render cache and the
+ * sampler cache, along with other caches. Usually data in the different
+ * caches don't interact (e.g. we don't render to our driver-generated
+ * immediate constant data), but for render-to-texture in FBOs we definitely
+ * do. When a batchbuffer is flushed, the kernel will ensure that everything
+ * necessary is flushed before another use of that BO, but for reuse from
+ * different caches within a batchbuffer, it's all our responsibility.
+ */
+static inline bool brw_check_dirty(brw_bo *bo)
+{
+ return bo->dirty;
+}
/* brw_queryformat.c */
void brw_query_internal_format(struct gl_context *ctx, GLenum target,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index f4ea7449f6..9197057e49 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -551,9 +551,9 @@ brw_emit_depthbuffer(struct brw_context *brw)
height = stencil_irb->Base.Base.Height;
}
- if (depth_mt && brw_check_dirty(brw, depth_mt->bo))
+ if (depth_mt && brw_check_dirty(depth_mt->bo))
brw_emit_mi_flush(brw);
- if (stencil_mt && brw_check_dirty(brw, stencil_mt->bo))
+ if (stencil_mt && brw_check_dirty(stencil_mt->bo))
brw_emit_mi_flush(brw);
brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index aab3bf141a..40102f9aa6 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -363,7 +363,7 @@ brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
void
brw_emit_mi_flush(struct brw_context *brw)
{
- if (brw_batch_count(&brw->batch) == 0)
+ if (!(brw->batch.flags & BATCH_DIRTY))
return;
if (brw->batch.ring == BLT_RING) {
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 77424f593a..30f0f11432 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -513,7 +513,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
ADVANCE_BATCH();
/* Mark this buffer as needing a TC flush, as we've rendered to it. */
- brw_bo_mark_dirty(&brw->batch, mt->bo);
+ assert(mt->bo->dirty);
brw_batch_end(&brw->batch);
brw_batch_maybe_flush(&brw->batch);
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 9e961b03a2..825643cf0c 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -645,7 +645,8 @@ intelEmitCopyBlit(struct brw_context *brw,
*
* FIXME: Figure out a way to avoid flushing when not required.
*/
- brw_mi_flush(brw, BLT_RING);
+ if (brw_check_dirty(dst_buffer))
+ brw_mi_flush(brw, BLT_RING);
assert(cpp <= 16);
BR13 = br13_for_cpp(cpp);
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 8f8f7e5ad5..47f9e18a35 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -35,7 +35,6 @@
#include "main/image.h"
#include "main/condrender.h"
#include "util/hash_table.h"
-#include "util/set.h"
#include "swrast/swrast.h"
#include "drivers/common/meta.h"
@@ -1049,40 +1048,6 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
intel_miptree_release(&new_mt);
}
-void
-brw_batch_clear_dirty(brw_batch *batch)
-{
- struct set_entry *entry;
-
- set_foreach(batch->render_cache, entry) {
- _mesa_set_remove(batch->render_cache, entry);
- }
-}
-
-void
-brw_bo_mark_dirty(brw_batch *batch, brw_bo *bo)
-{
- _mesa_set_add(batch->render_cache, bo);
-}
-
-/**
- * Emits an appropriate flush for a BO if it has been rendered to within the
- * same batchbuffer as a read that's about to be emitted.
- *
- * The GPU has separate, incoherent caches for the render cache and the
- * sampler cache, along with other caches. Usually data in the different
- * caches don't interact (e.g. we don't render to our driver-generated
- * immediate constant data), but for render-to-texture in FBOs we definitely
- * do. When a batchbuffer is flushed, the kernel will ensure that everything
- * necessary is flushed before another use of that BO, but for reuse from
- * different caches within a batchbuffer, it's all our responsibility.
- */
-bool
-brw_check_dirty(struct brw_context *brw, brw_bo *bo)
-{
- return _mesa_set_search(brw->batch.render_cache, bo);
-}
-
/**
* Do one-time context initializations related to GL_EXT_framebuffer_object.
* Hook in device driver functions.
@@ -1102,7 +1067,4 @@ intel_fbo_init(struct brw_context *brw)
dd->BlitFramebuffer = gen4_blit_framebuffer;
dd->EGLImageTargetRenderbufferStorage =
intel_image_target_renderbuffer_storage;
-
- brw->batch.render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
}
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 76e8923b1b..3befcc271e 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2492,7 +2492,7 @@ intel_update_r8stencil(struct brw_context *brw,
}
}
- if (brw_check_dirty(brw, dst->bo))
+ if (brw_check_dirty(dst->bo))
brw_emit_mi_flush(brw);
src->r8stencil_needs_update = false;
}
--
2.11.0
More information about the mesa-dev
mailing list