[Mesa-dev] [PATCH 4/4] i965: Send the minimal number of STATE_BASE_ADDRESS packets.

Kenneth Graunke kenneth at whitecape.org
Thu Apr 28 00:24:42 UTC 2016


STATE_BASE_ADDRESS stalls the whole pipeline, and the documentation
cautions us to emit it as little as possible for better performance.

We recently put some hacks in BLORP to try and avoid emitting it
if it was already set correctly.  However, this wasn't quite minimal:
if BLORP is the first operation (i.e. glClear()), then it would emit
it, and subsequent draw calls would emit it again.

This caused a small drop in performance in GPUTest Triangle when
switching from Meta to BLORP.

Unlike most packets, STATE_BASE_ADDRESS isn't influenced by GL state:
it needs to be emitted once per batch, before most other commands, or
whenever we change the program cache BO.  It's also valid in both the
3D and compute pipelines, which makes it even more unique.

This patch removes it from the atom mechanism and instead directly
calls it as part of every draw, compute dispatch, or BLORP operation.
We introduce a new flag indicating that STATE_BASE_ADDRESS has already
been emitted this batch, and if so, skip doing it again.  When we make
a new program cache BO, we simply reset the flag, so the next operation
will emit it again.  When we flush/reset the batch, we reset the flag.

This guarantees that we'll emit STATE_BASE_ADDRESS only when we have to.
It's also less code than the old atom mechanism.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_context.h       |  1 +
 src/mesa/drivers/dri/i965/brw_misc_state.c    | 13 ++++---------
 src/mesa/drivers/dri/i965/brw_state.h         |  2 --
 src/mesa/drivers/dri/i965/brw_state_cache.c   |  1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c  | 14 ++------------
 src/mesa/drivers/dri/i965/gen6_blorp.c        |  3 +--
 src/mesa/drivers/dri/i965/gen7_blorp.c        |  3 +--
 src/mesa/drivers/dri/i965/gen8_blorp.c        |  3 +--
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  1 +
 9 files changed, 12 insertions(+), 29 deletions(-)

I believe Topi's going to collect performance data on this patch.
I'll let him reply with that info, and I'll add a summary to the commit
message.

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index e449982..3b90cce 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -619,6 +619,7 @@ struct intel_batchbuffer {
    uint32_t state_batch_offset;
    enum brw_gpu_ring ring;
    bool needs_sol_reset;
+   bool state_base_address_emitted;
 
    struct {
       uint32_t *map_next;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index c7fccfa..b4b480d 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -1070,6 +1070,9 @@ const struct brw_tracked_state brw_invariant_state = {
 void
 brw_upload_state_base_address(struct brw_context *brw)
 {
+   if (brw->batch.state_base_address_emitted)
+      return;
+
    /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
     * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
     * programmed prior to STATE_BASE_ADDRESS.
@@ -1205,13 +1208,5 @@ brw_upload_state_base_address(struct brw_context *brw)
     */
 
    brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
+   brw->batch.state_base_address_emitted = true;
 }
-
-const struct brw_tracked_state brw_state_base_address = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_PROGRAM_CACHE,
-   },
-   .emit = brw_upload_state_base_address
-};
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index d704029..875cde2 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -66,7 +66,6 @@ extern const struct brw_tracked_state brw_polygon_stipple;
 extern const struct brw_tracked_state brw_recalculate_urb_fence;
 extern const struct brw_tracked_state brw_sf_unit;
 extern const struct brw_tracked_state brw_sf_vp;
-extern const struct brw_tracked_state brw_state_base_address;
 extern const struct brw_tracked_state brw_vs_samplers;
 extern const struct brw_tracked_state brw_tcs_samplers;
 extern const struct brw_tracked_state brw_tes_samplers;
@@ -195,7 +194,6 @@ brw_depthbuffer_format(struct brw_context *brw);
 
 void brw_upload_state_base_address(struct brw_context *brw);
 
-
 /* gen8_depth_state.c */
 void gen8_write_pma_stall_bits(struct brw_context *brw,
                                uint32_t pma_stall_bits);
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index c6aa134..0e98e65 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -199,6 +199,7 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
     * that depend on it (state base address on gen5+, or unit state before).
     */
    brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
+   brw->batch.state_base_address_emitted = false;
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index ed92a85..0b47ebe 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -79,7 +79,6 @@ static const struct brw_tracked_state *gen4_atoms[] =
    /* Command packets:
     */
    &brw_invariant_state,
-   &brw_state_base_address,
 
    &brw_binding_table_pointers,
    &brw_blend_constant_color,
@@ -109,9 +108,6 @@ static const struct brw_tracked_state *gen6_atoms[] =
 
    /* Command packets: */
 
-   /* must do before binding table pointers, cc state ptrs */
-   &brw_state_base_address,
-
    &brw_cc_vp,
    &gen6_viewport_state,	/* must do after *_vp stages */
 
@@ -175,9 +171,6 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
 {
    /* Command packets: */
 
-   /* must do before binding table pointers, cc state ptrs */
-   &brw_state_base_address,
-
    &brw_cc_vp,
    &gen7_sf_clip_viewport,
 
@@ -268,7 +261,6 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
 
 static const struct brw_tracked_state *gen7_compute_atoms[] =
 {
-   &brw_state_base_address,
    &gen7_l3_state,
    &brw_cs_image_surfaces,
    &gen7_cs_push_constants,
@@ -283,9 +275,6 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
 
 static const struct brw_tracked_state *gen8_render_atoms[] =
 {
-   /* Command packets: */
-   &brw_state_base_address,
-
    &brw_cc_vp,
    &gen8_sf_clip_viewport,
 
@@ -383,7 +372,6 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
 
 static const struct brw_tracked_state *gen8_compute_atoms[] =
 {
-   &brw_state_base_address,
    &gen7_l3_state,
    &brw_cs_image_surfaces,
    &gen7_cs_push_constants,
@@ -847,6 +835,8 @@ brw_upload_pipeline_state(struct brw_context *brw,
    brw_upload_programs(brw, pipeline);
    merge_ctx_state(brw, &state);
 
+   brw_upload_state_base_address(brw);
+
    const struct brw_tracked_state *atoms =
       brw_get_pipeline_atoms(brw, pipeline);
    const int num_atoms = brw->num_atoms[pipeline];
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.c b/src/mesa/drivers/dri/i965/gen6_blorp.c
index f3ac35b..07130b5 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.c
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.c
@@ -985,8 +985,7 @@ gen6_blorp_exec(struct brw_context *brw,
    /* Emit workaround flushes when we switch from drawing to blorping. */
    brw_emit_post_sync_nonzero_flush(brw);
 
-   if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState)
-      brw_upload_state_base_address(brw);
+   brw_upload_state_base_address(brw);
 
    gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
    gen6_emit_3dstate_sample_mask(brw,
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c
index 93028ff..8dc676b 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.c
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.c
@@ -810,8 +810,7 @@ gen7_blorp_exec(struct brw_context *brw,
    uint32_t wm_push_const_offset = 0;
    uint32_t wm_bind_bo_offset = 0;
 
-   if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState)
-      brw_upload_state_base_address(brw);
+   brw_upload_state_base_address(brw);
 
    gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
    gen6_emit_3dstate_sample_mask(brw,
diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.c b/src/mesa/drivers/dri/i965/gen8_blorp.c
index 4baa4bb..f1b2374 100644
--- a/src/mesa/drivers/dri/i965/gen8_blorp.c
+++ b/src/mesa/drivers/dri/i965/gen8_blorp.c
@@ -646,8 +646,7 @@ gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params)
 {
    uint32_t wm_bind_bo_offset = 0;
 
-   if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState)
-      brw_upload_state_base_address(brw);
+   brw_upload_state_base_address(brw);
 
    gen7_blorp_emit_cc_viewport(brw);
    gen7_l3_state.emit(brw);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index e41f927..ad2b82b 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -73,6 +73,7 @@ intel_batchbuffer_reset(struct brw_context *brw)
    brw->batch.reserved_space = BATCH_RESERVED;
    brw->batch.state_batch_offset = brw->batch.bo->size;
    brw->batch.needs_sol_reset = false;
+   brw->batch.state_base_address_emitted = false;
 
    /* We don't know what ring the new batch will be sent to until we see the
     * first BEGIN_BATCH or BEGIN_BATCH_BLT.  Mark it as unknown.
-- 
2.8.0



More information about the mesa-dev mailing list