[Mesa-dev] [PATCH 49/51] i965: Enable brw-batch dirty tracking

Chris Wilson chris at chris-wilson.co.uk
Tue Jan 10 21:24:12 UTC 2017


Remove the old hashtable approach and switch over to the inline write
tracking with brw-batch.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_batch.c         | 70 ++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_batch.h         | 10 +---
 src/mesa/drivers/dri/i965/brw_context.c       | 24 +++++----
 src/mesa/drivers/dri/i965/brw_context.h       | 17 ++++++-
 src/mesa/drivers/dri/i965/brw_misc_state.c    |  4 +-
 src/mesa/drivers/dri/i965/brw_pipe_control.c  |  2 +-
 src/mesa/drivers/dri/i965/gen8_depth_state.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_blit.c        |  3 +-
 src/mesa/drivers/dri/i965/intel_fbo.c         | 38 ---------------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  2 +-
 10 files changed, 108 insertions(+), 64 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c
index b257d000f8..515a81bf89 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_batch.c
@@ -990,6 +990,8 @@ uint64_t __brw_batch_reloc(struct brw_batch *batch,
              batch->needs_pipecontrol_ggtt_wa)
             target_bo->exec->flags |= EXEC_OBJECT_NEEDS_GTT;
       }
+
+      batch->flags |= BATCH_DIRTY;
    }
 
    return __brw_reloc_address(target_bo, target_offset);
@@ -1056,6 +1058,72 @@ static uint32_t __brw_batch_emit_seqno(struct brw_batch *batch,
 }
 
 /*
+ * Mark a bo as being written to by this batch.
+ *
+ * We frequently dirty a buffer and then emit a global cache flush
+ * cleaning all the dirty buffers within a batch. Afterwards, we may
+ * then write to the same buffer, but may not re-emit a relocation and
+ * so we need to notify that the buffer is now dirty again. Normally
+ * we can rely the relocation marking the write buffers as dirty.
+ *
+ * All caches are flushed by the kernel between batches, so at the end
+ * of each batch we can mark all buffers as clean again. (Before we can
+ * access the buffer, either by the GPU in the next batch or by the CPU
+ * following a set-domain call, that access will be after the flush has
+ * finished.)
+ */
+void brw_bo_mark_dirty(struct brw_batch *batch, struct brw_bo *bo)
+{
+   if (unlikely(bo->batch != batch)) {
+      bo = __brw_batch_lookup_handle(batch, bo->handle);
+      assert(bo);
+   }
+   assert(bo->batch == batch);
+   assert(bo != bo->batch->bo);
+
+   /* We should only be called on objects already in the batch for writing */
+   if (bo->exec == NULL)
+      return;
+
+   assert(brw_fence_get_request(&bo->read[batch->ring]) == batch->next_request);
+   assert(brw_fence_get_request(&bo->write) == batch->next_request);
+
+   if (bo->dirty)
+      return;
+
+   list_move(&bo->write.link, &batch->next_request->fences);
+   bo->dirty = true;
+   batch->flags |= BATCH_DIRTY;
+}
+
+/*
+ * At the end of each batch and when explicitly flushing caches within
+ * a batch, we can mark all the buffers within that batch as now clean.
+ */
+void brw_batch_clear_dirty(struct brw_batch *batch)
+{
+   struct list_head * const list = &batch->next_request->fences;
+
+   if (!(batch->flags & BATCH_DIRTY))
+      return;
+
+   list_for_each_entry(struct brw_fence, fence, list, link) {
+      struct brw_bo *bo;
+
+      if (brw_fence_get_signal(fence) != WRITE_SIGNAL)
+         break;
+
+      bo = container_of(fence, bo, write);
+      if (!bo->dirty)
+         break;
+
+      bo->dirty = false;
+   }
+
+   batch->flags &= ~BATCH_DIRTY;
+}
+
+/*
  * Close the batch by writing all the tail commands (to store register
  * values between batches, disable profiling, etc). And then to end it all
  * we set MI_BATCH_BUFFER_END.
@@ -1397,8 +1465,6 @@ skip:
    __brw_batch_throttle(batch, rq);
    __brw_batch_retire(batch);
 
-   brw_batch_clear_dirty(batch);
-
    return __brw_batch_next(batch);
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index 264868f253..074a13f550 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -107,7 +107,8 @@ typedef struct brw_batch {
    uint32_t *tail;
 
    uint32_t flags;
-#define BATCH_HAS_STATE_BASE    (1 << 31)
+#define BATCH_DIRTY		(1 << 31)
+#define BATCH_HAS_STATE_BASE    (1 << 30)
    uint32_t base_flags;
 
    enum brw_gpu_ring ring;
@@ -185,13 +186,6 @@ typedef struct brw_batch {
    struct list_head borrowed[1<<BORROWED_BITS];
 
    struct brw_bo *freed_bo;
-
-   /**
-    * Set of brw_bo* that have been rendered to within this batchbuffer
-    * and would need flushing before being used from another cache domain that
-    * isn't coherent with it (i.e. the sampler).
-    */
-   struct set *render_cache;
 } brw_batch;
 
 int brw_batch_init(struct brw_batch *batch,
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 15e467b00a..488d76be8e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -228,6 +228,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
    struct brw_context *brw = brw_context(ctx);
    struct intel_texture_object *tex_obj;
    struct intel_renderbuffer *depth_irb;
+   bool flush = false;
 
    if (ctx->swrast_context)
       _swrast_InvalidateState(ctx, new_state);
@@ -263,8 +264,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
       const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ?
                            0 : INTEL_MIPTREE_IGNORE_CCS_E;
       intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, flags);
-      if (brw_check_dirty(brw, tex_obj->mt->bo))
-         brw_mi_flush(brw, RENDER_RING);
+      flush |= brw_check_dirty(tex_obj->mt->bo);
 
       if (tex_obj->base.StencilSampling ||
           tex_obj->mt->format == MESA_FORMAT_S_UINT8) {
@@ -283,8 +283,14 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
             struct gl_image_unit *u =
                &ctx->ImageUnits[shader->Program->sh.ImageUnits[j]];
             tex_obj = intel_texture_object(u->TexObj);
+            if (!tex_obj)
+               continue;
 
-            if (tex_obj && tex_obj->mt) {
+            if (tex_obj->base.Target == GL_TEXTURE_BUFFER) {
+               struct intel_buffer_object *intel_obj =
+                  intel_buffer_object(tex_obj->base.BufferObject);
+               flush |= brw_check_dirty(intel_obj->buffer);
+            } else if (tex_obj->mt) {
                /* Access to images is implemented using indirect messages
                 * against data port. Normal render target write understands
                 * lossless compression but unfortunately the typed/untyped
@@ -300,8 +306,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
                              "off lossless compression");
                }
 
-               if (brw_check_dirty(brw, tex_obj->mt->bo))
-                  brw_mi_flush(brw, RENDER_RING);
+               flush |= brw_check_dirty(tex_obj->mt->bo);
             }
          }
       }
@@ -321,8 +326,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
              intel_miptree_resolve_color(
                 brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count,
                 INTEL_MIPTREE_IGNORE_CCS_E))
-            if (brw_check_dirty(brw, irb->mt->bo))
-               brw_emit_mi_flush(brw);
+            flush |= brw_check_dirty(irb->mt->bo);
       }
    }
 
@@ -353,11 +357,13 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
           */
          assert(!intel_miptree_is_lossless_compressed(brw, mt));
          intel_miptree_all_slices_resolve_color(brw, mt, 0);
-         if (brw_check_dirty(brw, mt->bo))
-            brw_mi_flush(brw, RENDER_RING);
+         flush |= brw_check_dirty(mt->bo);
       }
    }
 
+   if (flush)
+      brw_mi_flush(brw, RENDER_RING);
+
    _mesa_lock_context_textures(ctx);
 
    if (new_state & _NEW_BUFFERS) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 5e2df95508..cd31b730f5 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1680,7 +1680,22 @@ void gen7_emit_cs_stall_flush(struct brw_context *brw);
 void brw_mi_flush(struct brw_context *brw, enum brw_gpu_ring ring);
 void brw_pipe_control_flush(struct brw_context *brw, unsigned flags);
 
-bool brw_check_dirty(struct brw_context *ctx, brw_bo *bo);
+/**
+ * Emits an appropriate flush for a BO if it has been rendered to within the
+ * same batchbuffer as a read that's about to be emitted.
+ *
+ * The GPU has separate, incoherent caches for the render cache and the
+ * sampler cache, along with other caches.  Usually data in the different
+ * caches don't interact (e.g. we don't render to our driver-generated
+ * immediate constant data), but for render-to-texture in FBOs we definitely
+ * do.  When a batchbuffer is flushed, the kernel will ensure that everything
+ * necessary is flushed before another use of that BO, but for reuse from
+ * different caches within a batchbuffer, it's all our responsibility.
+ */
+static inline bool brw_check_dirty(brw_bo *bo)
+{
+   return bo->dirty;
+}
 
 /* brw_queryformat.c */
 void brw_query_internal_format(struct gl_context *ctx, GLenum target,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index f4ea7449f6..9197057e49 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -551,9 +551,9 @@ brw_emit_depthbuffer(struct brw_context *brw)
       height = stencil_irb->Base.Base.Height;
    }
 
-   if (depth_mt && brw_check_dirty(brw, depth_mt->bo))
+   if (depth_mt && brw_check_dirty(depth_mt->bo))
       brw_emit_mi_flush(brw);
-   if (stencil_mt && brw_check_dirty(brw, stencil_mt->bo))
+   if (stencil_mt && brw_check_dirty(stencil_mt->bo))
       brw_emit_mi_flush(brw);
 
    brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index aab3bf141a..40102f9aa6 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -363,7 +363,7 @@ brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
 void
 brw_emit_mi_flush(struct brw_context *brw)
 {
-   if (brw_batch_count(&brw->batch) == 0)
+   if (!(brw->batch.flags & BATCH_DIRTY))
       return;
 
    if (brw->batch.ring == BLT_RING) {
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 77424f593a..30f0f11432 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -513,7 +513,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
    ADVANCE_BATCH();
 
    /* Mark this buffer as needing a TC flush, as we've rendered to it. */
-   brw_bo_mark_dirty(&brw->batch, mt->bo);
+   assert(mt->bo->dirty);
 
    brw_batch_end(&brw->batch);
    brw_batch_maybe_flush(&brw->batch);
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 9e961b03a2..825643cf0c 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -645,7 +645,8 @@ intelEmitCopyBlit(struct brw_context *brw,
        *
        * FIXME: Figure out a way to avoid flushing when not required.
        */
-      brw_mi_flush(brw, BLT_RING);
+      if (brw_check_dirty(dst_buffer))
+         brw_mi_flush(brw, BLT_RING);
 
       assert(cpp <= 16);
       BR13 = br13_for_cpp(cpp);
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 8f8f7e5ad5..47f9e18a35 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -35,7 +35,6 @@
 #include "main/image.h"
 #include "main/condrender.h"
 #include "util/hash_table.h"
-#include "util/set.h"
 
 #include "swrast/swrast.h"
 #include "drivers/common/meta.h"
@@ -1049,40 +1048,6 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
    intel_miptree_release(&new_mt);
 }
 
-void
-brw_batch_clear_dirty(brw_batch *batch)
-{
-   struct set_entry *entry;
-
-   set_foreach(batch->render_cache, entry) {
-      _mesa_set_remove(batch->render_cache, entry);
-   }
-}
-
-void
-brw_bo_mark_dirty(brw_batch *batch, brw_bo *bo)
-{
-   _mesa_set_add(batch->render_cache, bo);
-}
-
-/**
- * Emits an appropriate flush for a BO if it has been rendered to within the
- * same batchbuffer as a read that's about to be emitted.
- *
- * The GPU has separate, incoherent caches for the render cache and the
- * sampler cache, along with other caches.  Usually data in the different
- * caches don't interact (e.g. we don't render to our driver-generated
- * immediate constant data), but for render-to-texture in FBOs we definitely
- * do.  When a batchbuffer is flushed, the kernel will ensure that everything
- * necessary is flushed before another use of that BO, but for reuse from
- * different caches within a batchbuffer, it's all our responsibility.
- */
-bool
-brw_check_dirty(struct brw_context *brw, brw_bo *bo)
-{
-   return _mesa_set_search(brw->batch.render_cache, bo);
-}
-
 /**
  * Do one-time context initializations related to GL_EXT_framebuffer_object.
  * Hook in device driver functions.
@@ -1102,7 +1067,4 @@ intel_fbo_init(struct brw_context *brw)
       dd->BlitFramebuffer = gen4_blit_framebuffer;
    dd->EGLImageTargetRenderbufferStorage =
       intel_image_target_renderbuffer_storage;
-
-   brw->batch.render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
-                                              _mesa_key_pointer_equal);
 }
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 76e8923b1b..3befcc271e 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2492,7 +2492,7 @@ intel_update_r8stencil(struct brw_context *brw,
       }
    }
 
-   if (brw_check_dirty(brw, dst->bo))
+   if (brw_check_dirty(dst->bo))
       brw_emit_mi_flush(brw);
    src->r8stencil_needs_update = false;
 }
-- 
2.11.0



More information about the mesa-dev mailing list