[Mesa-dev] [PATCH 8/8] i965/vs: Simplify fs_visitor's ATTR file.

Kenneth Graunke kenneth at whitecape.org
Mon Aug 17 16:07:50 PDT 2015


Previously, ATTR was indexed by VERT_ATTRIB_* slots; at the end of
compilation, assign_vs_urb_setup() translated those into GRF units,
and converted ATTR to HW_REGs.

This patch moves the transslation earlier, making ATTR work in terms of
GRF units from the beginning.  assign_vs_urb_setup() simply has to add
the number of payload registers and push constants to obtain the final
hardware GRF number.  (We can't do this earlier as those values aren't
known.)

ATTR still supports reg_offset; however, it's simply added to reg.
It's not clear whether this is valuable or not.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         | 26 +++------------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_nir.c          | 48 +++++++++++++++++++++++++---
 3 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index efabb52..f556ed6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1511,10 +1511,9 @@ void
 fs_visitor::assign_vs_urb_setup()
 {
    brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
-   int grf, count, slot, channel, attr;
 
    assert(stage == MESA_SHADER_VERTEX);
-   count = _mesa_bitcount_64(vs_prog_data->inputs_read);
+   int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
    if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
       count++;
 
@@ -1534,25 +1533,10 @@ fs_visitor::assign_vs_urb_setup()
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == ATTR) {
-
-            if (inst->src[i].reg == VERT_ATTRIB_MAX) {
-               slot = count - 1;
-            } else {
-               /* Attributes come in in a contiguous block, ordered by their
-                * gl_vert_attrib value.  That means we can compute the slot
-                * number for an attribute by masking out the enabled
-                * attributes before it and counting the bits.
-                */
-               attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
-               slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
-                                        BITFIELD64_MASK(attr));
-            }
-
-            channel = inst->src[i].reg_offset & 3;
-
-            grf = payload.num_regs +
-               prog_data->curb_read_length +
-               slot * 4 + channel;
+            int grf = payload.num_regs +
+                      prog_data->curb_read_length +
+                      inst->src[i].reg +
+                      inst->src[i].reg_offset;
 
             inst->src[i].file = HW_REG;
             inst->src[i].fixed_hw_reg =
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 111db8c..0ec8ef5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -53,7 +53,7 @@ fs_reg *
 fs_visitor::emit_vs_system_value(int location)
 {
    fs_reg *reg = new(this->mem_ctx)
-      fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D);
+      fs_reg(ATTR, 4*_mesa_bitcount_64(prog->InputsRead), BRW_REGISTER_TYPE_D);
    brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
 
    switch (location) {
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 2e2d02b..e0cf61e 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -27,8 +27,36 @@
 #include "glsl/nir/glsl_to_nir.h"
 #include "program/prog_to_nir.h"
 
+static bool
+remap_vs_attrs(nir_block *block, void *closure)
+{
+   GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
+      assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);
+
+      if (intrin->intrinsic == nir_intrinsic_load_input) {
+         /* Attributes come in a contiguous block, ordered by their
+          * gl_vert_attrib value.  That means we can compute the slot
+          * number for an attribute by masking out the enabled attributes
+          * before it and counting the bits.
+          */
+         int attr = intrin->const_index[0];
+         int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
+         intrin->const_index[0] = 4 * slot;
+      }
+   }
+   return true;
+}
+
 static void
-lower_scalar_vs_inputs(nir_shader *nir)
+lower_scalar_vs_inputs(nir_shader *nir, GLbitfield64 inputs_read)
 {
    /* Start with the location of the variable's base. */
    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
@@ -40,10 +68,19 @@ lower_scalar_vs_inputs(nir_shader *nir)
     * type_size_vec4 here.
     */
    nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+   /* Finally, translate VERT_ATTRIB_* values into the actual registers. */
+   nir_foreach_overload(nir, overload) {
+      if (overload->impl) {
+         nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
+      }
+   }
 }
 
 static void
-brw_lower_nir_io_scalar(nir_shader *nir, gl_shader_stage stage)
+brw_lower_nir_io_scalar(nir_shader *nir,
+                        const struct gl_program *prog,
+                        gl_shader_stage stage)
 {
    nir_assign_var_locations_direct_first(nir, &nir->uniforms,
                                          &nir->num_direct_uniforms,
@@ -53,7 +90,10 @@ brw_lower_nir_io_scalar(nir_shader *nir, gl_shader_stage stage)
 
    switch (stage) {
    case MESA_SHADER_VERTEX:
-      lower_scalar_vs_inputs(nir);
+      /* Note that we can use prog->InputsRead rather than key->inputs_read,
+       * since the two identical aside from Gen4-5 edge flag differences.
+       */
+      lower_scalar_vs_inputs(nir, prog->InputsRead);
       break;
    default:
       nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar);
@@ -161,7 +201,7 @@ brw_create_nir(struct brw_context *brw,
    nir_optimize(nir, is_scalar);
 
    if (is_scalar)
-      brw_lower_nir_io_scalar(nir, stage);
+      brw_lower_nir_io_scalar(nir, prog, stage);
    else
       brw_lower_nir_io_vec4(nir, stage);
 
-- 
2.5.0



More information about the mesa-dev mailing list