[Mesa-dev] [PATCH 09/11] i965: Push UBO data, but don't use it just yet.

Kenneth Graunke kenneth at whitecape.org
Fri Jul 7 00:22:18 UTC 2017


This patch starts uploading UBO data via 3DSTATE_CONSTANT_* packets,
and updates the compiler to know that there's extra payload data, so
things continue working.  However, it still issues pull loads for all
data.  I wanted to separate the two aspects for greater bisectability.
---
 src/intel/compiler/brw_fs.cpp                    |  9 +++-
 src/intel/compiler/brw_vec4.cpp                  |  3 ++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  2 +
 src/mesa/drivers/dri/i965/genX_state_upload.c    | 69 +++++++++++++++++++++---
 4 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 43b6e342043..cb8d093d00b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1383,7 +1383,14 @@ fs_visitor::emit_gs_thread_end()
 void
 fs_visitor::assign_curb_setup()
 {
-   prog_data->curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8;
+   unsigned uniform_push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
+
+   unsigned ubo_push_length = 0;
+   for (int i = 0; i < 4; i++) {
+      ubo_push_length += stage_prog_data->ubo_ranges[i].length;
+   }
+
+   prog_data->curb_read_length = uniform_push_length + ubo_push_length;
 
    /* Map the offsets in the UNIFORM file to fixed HW regs. */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 3de7d931dde..410922c62b2 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1776,6 +1776,9 @@ vec4_visitor::setup_uniforms(int reg)
       reg += ALIGN(uniforms, 2) / 2;
    }
 
+   for (int i = 0; i < 4; i++)
+      reg += stage_prog_data->ubo_ranges[i].length;
+
    stage_prog_data->nr_params = this->uniforms * 4;
 
    prog_data->base.curb_read_length =
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 3615c1805ec..9293bea992c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1444,6 +1444,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
       }
    }
 
+   stage_state->push_constants_dirty = true;
+
    if (prog->info.num_ubos || prog->info.num_ssbos)
       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 }
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 264e999b86a..c5d38e641d7 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2743,6 +2743,8 @@ UNUSED static const uint32_t push_constant_opcodes[] = {
 static void
 genX(upload_push_constant_packets)(struct brw_context *brw)
 {
+   struct gl_context *ctx = &brw->ctx;
+
    UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
 
    struct brw_stage_state *stage_states[] = {
@@ -2759,19 +2761,71 @@ genX(upload_push_constant_packets)(struct brw_context *brw)
 
    for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
       struct brw_stage_state *stage_state = stage_states[stage];
-      bool active = stage_state->prog_data && stage_state->push_const_size > 0;
+      struct gl_program *prog = ctx->_Shader->CurrentProgram[stage];
 
       if (!stage_state->push_constants_dirty)
          continue;
 
       brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
          pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
-         if (active) {
+         if (stage_state->prog_data) {
 #if GEN_GEN >= 8 || GEN_IS_HASWELL
-            pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
-            pkt.ConstantBody.Buffer[2] =
-               render_ro_bo(stage_state->push_const_bo,
-                            stage_state->push_const_offset);
+            /* The Skylake PRM contains the following restriction:
+             *
+             *    "The driver must ensure The following case does not occur
+             *     without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+             *     buffer 3 read length equal to zero committed followed by a
+             *     3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+             *     zero committed."
+             *
+             * To avoid this, we program the buffers in the highest slots.
+             * This way, slot 0 is only used if slot 3 is also used.
+             */
+            int n = 3;
+
+            for (int i = 3; i >= 0; i--) {
+               const struct brw_ubo_range *range =
+                  &stage_state->prog_data->ubo_ranges[i];
+
+               if (range->length == 0)
+                  continue;
+
+               const struct gl_uniform_block *block =
+                  prog->sh.UniformBlocks[range->block];
+               const struct gl_uniform_buffer_binding *binding =
+                  &ctx->UniformBufferBindings[block->Binding];
+
+               if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+                  static unsigned msg_id = 0;
+                  _mesa_gl_debug(ctx, &msg_id, MESA_DEBUG_SOURCE_API,
+                                 MESA_DEBUG_TYPE_UNDEFINED,
+                                 MESA_DEBUG_SEVERITY_HIGH,
+                                 "UBO %d unbound, %s shader uniform data "
+                                 "will be undefined.",
+                                 range->block,
+                                 _mesa_shader_stage_to_string(stage));
+                  continue;
+               }
+
+               assert(binding->Offset % 32 == 0);
+
+               struct brw_bo *bo = intel_bufferobj_buffer(brw,
+                  intel_buffer_object(binding->BufferObject),
+                  binding->Offset, range->length * 32);
+
+               pkt.ConstantBody.ReadLength[n] = range->length;
+               pkt.ConstantBody.Buffer[n] =
+                  render_ro_bo(bo, range->start * 32 + binding->Offset);
+               n--;
+            }
+
+            if (stage_state->push_const_size > 0) {
+               assert(n >= 0);
+               pkt.ConstantBody.ReadLength[n] = stage_state->push_const_size;
+               pkt.ConstantBody.Buffer[n] =
+                  render_ro_bo(stage_state->push_const_bo,
+                               stage_state->push_const_offset);
+            }
 #else
             pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
             pkt.ConstantBody.Buffer[0].offset =
@@ -3465,7 +3519,8 @@ genX(upload_ps)(struct brw_context *brw)
       ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
 #endif
 
-      if (prog_data->base.nr_params > 0)
+      if (prog_data->base.nr_params > 0 ||
+          prog_data->base.ubo_ranges[0].length > 0)
          ps.PushConstantEnable = true;
 
 #if GEN_GEN < 8
-- 
2.13.2



More information about the mesa-dev mailing list