[Mesa-dev] [PATCH 51/70] i965: Enable brw-batch dirty tracking
Chris Wilson
chris at chris-wilson.co.uk
Fri Aug 7 13:13:55 PDT 2015
Remove the old hashtable approach and switch over to the inline write
tracking with brw-batch.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
src/mesa/drivers/dri/i965/brw_batch.c | 71 +++++++++++++++++++++++++++-
src/mesa/drivers/dri/i965/brw_batch.h | 9 +---
src/mesa/drivers/dri/i965/brw_context.c | 2 +-
src/mesa/drivers/dri/i965/brw_context.h | 17 ++++++-
src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +-
src/mesa/drivers/dri/i965/gen8_depth_state.c | 2 +-
src/mesa/drivers/dri/i965/intel_blit.c | 3 +-
src/mesa/drivers/dri/i965/intel_fbo.c | 38 ---------------
8 files changed, 93 insertions(+), 53 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c
index d1f5828..100466f 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_batch.c
@@ -415,6 +415,7 @@ static void __brw_batch_clear(struct brw_batch *batch)
batch->state = BATCH_SIZE / 4;
batch->aperture = 0;
batch->batch_flags = batch->batch_base_flags;
+ batch->dirty = false;
}
/*
@@ -840,12 +841,80 @@ uint64_t __brw_batch_reloc(struct brw_batch *batch,
batch->needs_pipecontrol_ggtt_wa)
target_bo->exec->flags |= EXEC_OBJECT_NEEDS_GTT;
}
+ batch->dirty = true;
}
return target_bo->offset + target_offset;
}
/*
+ * Mark a bo as being written to by this batch.
+ *
+ * We frequently dirty a buffer and then emit a global cache flush
+ * cleaning all the dirty buffers within a batch. Afterwards, we may
+ * then write to the same buffer, but may not re-emit a relocation and
+ * so we need to notify that the buffer is now dirty again. Normally
+ * we can rely the relocation marking the write buffers as dirty.
+ *
+ * All caches are flushed by the kernel between batches, so at the end
+ * of each batch we can mark all buffers as clean again. (Before we can
+ * access the buffer, either by the GPU in the next batch or by the CPU
+ * following a set-domain call, that access will be after the flush has
+ * finished.)
+ */
+void brw_bo_mark_dirty(struct brw_batch *batch, struct brw_bo *bo)
+{
+ if (unlikely(bo->batch != batch)) {
+ bo = __brw_batch_lookup_handle(batch, bo->handle);
+ assert(bo);
+ }
+ assert(bo->batch == batch);
+ assert(bo != bo->batch->bo);
+
+ /* We should only be called on objects already in the batch for writing */
+ if (bo->exec == NULL)
+ return;
+
+ assert(bo->read.rq == batch->next_request);
+ assert(bo->write.rq == batch->next_request);
+ assert(bo->domain == DOMAIN_GPU);
+
+ if (bo->dirty)
+ return;
+
+ list_move(&bo->write.link, &batch->next_request->fences);
+ bo->dirty = true;
+ batch->dirty = true;
+}
+
+/*
+ * At the end of each batch and when explicitly flushing caches within
+ * a batch, we can mark all the buffers within that batch as now clean.
+ */
+void brw_batch_clear_dirty(struct brw_batch *batch)
+{
+ struct list_head * const list = &batch->next_request->fences;
+
+ if (!batch->dirty)
+ return;
+
+ list_for_each_entry(struct brw_fence, fence, list, link) {
+ struct brw_bo *bo;
+
+ if (fence->signal != (void*)WRITE_SIGNAL)
+ break;
+
+ bo = container_of(fence, bo, write);
+ if (!bo->dirty)
+ break;
+
+ bo->dirty = false;
+ }
+
+ batch->dirty = false;
+}
+
+/*
* Close the batch by writing all the tail commands (to store register
* values between batches, disable profiling, etc). And then to end it all
* we set MI_BATCH_BUFFER_END.
@@ -1111,8 +1180,6 @@ skip:
__brw_batch_throttle(batch, rq);
__brw_batch_retire(batch);
- brw_batch_clear_dirty(batch);
-
return __brw_batch_reset(batch);
}
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index da88bc2..b3c4252 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -125,6 +125,8 @@ typedef struct brw_batch {
uint64_t max_aperture;
uint64_t rss, peak_rss, vmsize;
+ bool dirty : 1;
+
bool has_softpin : 1;
bool has_llc : 1;
bool has_mmap_wc : 1;
@@ -180,13 +182,6 @@ typedef struct brw_batch {
struct list_head borrowed[1<<BORROWED_BITS];
struct brw_bo *freed_bo;
-
- /**
- * Set of brw_bo* that have been rendered to within this batchbuffer
- * and would need flushing before being used from another cache domain that
- * isn't coherent with it (i.e. the sampler).
- */
- struct set *render_cache;
} brw_batch;
int brw_batch_init(struct brw_batch *batch,
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 117cce4..a51fd81 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -186,7 +186,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
continue;
intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
intel_miptree_resolve_color(brw, tex_obj->mt);
- if (brw_check_dirty(brw, tex_obj->mt->bo))
+ if (brw_check_dirty(tex_obj->mt->bo))
brw_mi_flush(brw, RENDER_RING);
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 001e70f..547b655 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1970,7 +1970,22 @@ void gen7_emit_cs_stall_flush(struct brw_context *brw);
void brw_mi_flush(struct brw_context *brw, enum brw_gpu_ring ring);
-bool brw_check_dirty(struct brw_context *ctx, brw_bo *bo);
+/**
+ * Emits an appropriate flush for a BO if it has been rendered to within the
+ * same batchbuffer as a read that's about to be emitted.
+ *
+ * The GPU has separate, incoherent caches for the render cache and the
+ * sampler cache, along with other caches. Usually data in the different
+ * caches don't interact (e.g. we don't render to our driver-generated
+ * immediate constant data), but for render-to-texture in FBOs we definitely
+ * do. When a batchbuffer is flushed, the kernel will ensure that everything
+ * necessary is flushed before another use of that BO, but for reuse from
+ * different caches within a batchbuffer, it's all our responsibility.
+ */
+static inline bool brw_check_dirty(brw_bo *bo)
+{
+ return bo->dirty;
+}
#ifdef __cplusplus
}
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index b1e8503..38c3003 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -546,9 +546,9 @@ brw_emit_depthbuffer(struct brw_context *brw)
height = stencil_irb->Base.Base.Height;
}
- if (depth_mt && brw_check_dirty(brw, depth_mt->bo))
+ if (depth_mt && brw_check_dirty(depth_mt->bo))
brw_emit_mi_flush(brw);
- if (stencil_mt && brw_check_dirty(brw, stencil_mt->bo))
+ if (stencil_mt && brw_check_dirty(stencil_mt->bo))
brw_emit_mi_flush(brw);
brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index f8ffbeb..85e15c5 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -510,7 +510,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
ADVANCE_BATCH();
/* Mark this buffer as needing a TC flush, as we've rendered to it. */
- brw_bo_mark_dirty(&brw->batch, mt->bo);
+ assert(mt->bo->dirty);
if (brw_batch_end(&brw->batch)) {
struct gl_context *ctx = &brw->ctx;
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index a35c8df..2257845 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -560,7 +560,8 @@ intelEmitCopyBlit(struct brw_context *brw,
*
* FIXME: Figure out a way to avoid flushing when not required.
*/
- brw_mi_flush(brw, BLT_RING);
+ if (brw_check_dirty(dst_buffer))
+ brw_mi_flush(brw, BLT_RING);
assert(cpp <= 16);
BR13 = br13_for_cpp(cpp);
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 8344791..e2767ac 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -38,7 +38,6 @@
#include "main/image.h"
#include "main/condrender.h"
#include "util/hash_table.h"
-#include "util/set.h"
#include "swrast/swrast.h"
#include "drivers/common/meta.h"
@@ -1048,40 +1047,6 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
intel_miptree_release(&new_mt);
}
-void
-brw_batch_clear_dirty(brw_batch *batch)
-{
- struct set_entry *entry;
-
- set_foreach(batch->render_cache, entry) {
- _mesa_set_remove(batch->render_cache, entry);
- }
-}
-
-void
-brw_bo_mark_dirty(brw_batch *batch, brw_bo *bo)
-{
- _mesa_set_add(batch->render_cache, bo);
-}
-
-/**
- * Emits an appropriate flush for a BO if it has been rendered to within the
- * same batchbuffer as a read that's about to be emitted.
- *
- * The GPU has separate, incoherent caches for the render cache and the
- * sampler cache, along with other caches. Usually data in the different
- * caches don't interact (e.g. we don't render to our driver-generated
- * immediate constant data), but for render-to-texture in FBOs we definitely
- * do. When a batchbuffer is flushed, the kernel will ensure that everything
- * necessary is flushed before another use of that BO, but for reuse from
- * different caches within a batchbuffer, it's all our responsibility.
- */
-bool
-brw_check_dirty(struct brw_context *brw, brw_bo *bo)
-{
- return _mesa_set_search(brw->batch.render_cache, bo);
-}
-
/**
* Do one-time context initializations related to GL_EXT_framebuffer_object.
* Hook in device driver functions.
@@ -1102,7 +1067,4 @@ intel_fbo_init(struct brw_context *brw)
dd->BlitFramebuffer = gen4_blit_framebuffer;
dd->EGLImageTargetRenderbufferStorage =
intel_image_target_renderbuffer_storage;
-
- brw->batch.render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
}
--
2.5.0
More information about the mesa-dev
mailing list