<p dir="ltr"><br>
On Aug 17, 2015 4:08 PM, "Kenneth Graunke" <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>> wrote:<br>
><br>
> Previously, ATTR was indexed by VERT_ATTRIB_* slots; at the end of<br>
> compilation, assign_vs_urb_setup() translated those into GRF units,<br>
> and converted ATTR to HW_REGs.<br>
><br>
> This patch moves the transslation earlier, making ATTR work in terms of<br>
> GRF units from the beginning.  assign_vs_urb_setup() simply has to add<br>
> the number of payload registers and push constants to obtain the final<br>
> hardware GRF number.  (We can't do this earlier as those values aren't<br>
> known.)<br>
><br>
> ATTR still supports reg_offset; however, it's simply added to reg.<br>
> It's not clear whether this is valuable or not.<br>
><br>
> Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
> ---<br>
>  src/mesa/drivers/dri/i965/brw_fs.cpp         | 26 +++------------<br>
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  2 +-<br>
>  src/mesa/drivers/dri/i965/brw_nir.c          | 48 +++++++++++++++++++++++++---<br>
>  3 files changed, 50 insertions(+), 26 deletions(-)<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
> index efabb52..f556ed6 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
> @@ -1511,10 +1511,9 @@ void<br>
>  fs_visitor::assign_vs_urb_setup()<br>
>  {<br>
>     brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;<br>
> -   int grf, count, slot, channel, attr;<br>
><br>
>     assert(stage == MESA_SHADER_VERTEX);<br>
> -   count = _mesa_bitcount_64(vs_prog_data->inputs_read);<br>
> +   int count = _mesa_bitcount_64(vs_prog_data->inputs_read);<br>
>     if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)<br>
>        count++;<br>
><br>
> @@ -1534,25 +1533,10 @@ fs_visitor::assign_vs_urb_setup()<br>
>     foreach_block_and_inst(block, fs_inst, inst, cfg) {<br>
>        for (int i = 0; i < inst->sources; i++) {<br>
>           if (inst->src[i].file == ATTR) {<br>
> -<br>
> -            if (inst->src[i].reg == VERT_ATTRIB_MAX) {<br>
> -               slot = count - 1;<br>
> -            } else {<br>
> -               /* Attributes come in in a contiguous block, ordered by their<br>
> -                * gl_vert_attrib value.  That means we can compute the slot<br>
> -                * number for an attribute by masking out the enabled<br>
> -                * attributes before it and counting the bits.<br>
> -                */<br>
> -               attr = inst->src[i].reg + inst->src[i].reg_offset / 4;<br>
> -               slot = _mesa_bitcount_64(vs_prog_data->inputs_read &<br>
> -                                        BITFIELD64_MASK(attr));<br>
> -            }<br>
> -<br>
> -            channel = inst->src[i].reg_offset & 3;<br>
> -<br>
> -            grf = payload.num_regs +<br>
> -               prog_data->curb_read_length +<br>
> -               slot * 4 + channel;<br>
> +            int grf = payload.num_regs +<br>
> +                      prog_data->curb_read_length +<br>
> +                      inst->src[i].reg +<br>
> +                      inst->src[i].reg_offset;<br>
><br>
>              inst->src[i].file = HW_REG;<br>
>              inst->src[i].fixed_hw_reg =<br>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
> index 111db8c..0ec8ef5 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
> @@ -53,7 +53,7 @@ fs_reg *<br>
>  fs_visitor::emit_vs_system_value(int location)<br>
>  {<br>
>     fs_reg *reg = new(this->mem_ctx)<br>
> -      fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D);<br>
> +      fs_reg(ATTR, 4*_mesa_bitcount_64(prog->InputsRead), BRW_REGISTER_TYPE_D);<br>
>     brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;<br>
><br>
>     switch (location) {<br>
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c<br>
> index 2e2d02b..e0cf61e 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_nir.c<br>
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c<br>
> @@ -27,8 +27,36 @@<br>
>  #include "glsl/nir/glsl_to_nir.h"<br>
>  #include "program/prog_to_nir.h"<br>
><br>
> +static bool<br>
> +remap_vs_attrs(nir_block *block, void *closure)<br>
> +{<br>
> +   GLbitfield64 inputs_read = *((GLbitfield64 *) closure);<br>
> +<br>
> +   nir_foreach_instr(block, instr) {<br>
> +      if (instr->type != nir_instr_type_intrinsic)<br>
> +         continue;<br>
> +<br>
> +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
> +<br>
> +      /* We set EmitNoIndirect for VS inputs, so there are no indirects. */<br>
> +      assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);<br>
> +<br>
> +      if (intrin->intrinsic == nir_intrinsic_load_input) {<br>
> +         /* Attributes come in a contiguous block, ordered by their<br>
> +          * gl_vert_attrib value.  That means we can compute the slot<br>
> +          * number for an attribute by masking out the enabled attributes<br>
> +          * before it and counting the bits.<br>
> +          */<br>
> +         int attr = intrin->const_index[0];<br>
> +         int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));</p>
<p dir="ltr">Um... Can't we just set var->data.driver_location to this value? What's the point of it being its own pass?</p>
<p dir="ltr">> +         intrin->const_index[0] = 4 * slot;<br>
> +      }<br>
> +   }<br>
> +   return true;<br>
> +}<br>
> +<br>
>  static void<br>
> -lower_scalar_vs_inputs(nir_shader *nir)<br>
> +lower_scalar_vs_inputs(nir_shader *nir, GLbitfield64 inputs_read)<br>
>  {<br>
>     /* Start with the location of the variable's base. */<br>
>     foreach_list_typed(nir_variable, var, node, &nir->inputs) {<br>
> @@ -40,10 +68,19 @@ lower_scalar_vs_inputs(nir_shader *nir)<br>
>      * type_size_vec4 here.<br>
>      */<br>
>     nir_lower_io(nir, nir_var_shader_in, type_size_vec4);<br>
> +<br>
> +   /* Finally, translate VERT_ATTRIB_* values into the actual registers. */<br>
> +   nir_foreach_overload(nir, overload) {<br>
> +      if (overload->impl) {<br>
> +         nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);<br>
> +      }<br>
> +   }<br>
>  }<br>
><br>
>  static void<br>
> -brw_lower_nir_io_scalar(nir_shader *nir, gl_shader_stage stage)<br>
> +brw_lower_nir_io_scalar(nir_shader *nir,<br>
> +                        const struct gl_program *prog,<br>
> +                        gl_shader_stage stage)<br>
>  {<br>
>     nir_assign_var_locations_direct_first(nir, &nir->uniforms,<br>
>                                           &nir->num_direct_uniforms,<br>
> @@ -53,7 +90,10 @@ brw_lower_nir_io_scalar(nir_shader *nir, gl_shader_stage stage)<br>
><br>
>     switch (stage) {<br>
>     case MESA_SHADER_VERTEX:<br>
> -      lower_scalar_vs_inputs(nir);<br>
> +      /* Note that we can use prog->InputsRead rather than key->inputs_read,<br>
> +       * since the two identical aside from Gen4-5 edge flag differences.<br>
> +       */<br>
> +      lower_scalar_vs_inputs(nir, prog->InputsRead);<br>
>        break;<br>
>     default:<br>
>        nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar);<br>
> @@ -161,7 +201,7 @@ brw_create_nir(struct brw_context *brw,<br>
>     nir_optimize(nir, is_scalar);<br>
><br>
>     if (is_scalar)<br>
> -      brw_lower_nir_io_scalar(nir, stage);<br>
> +      brw_lower_nir_io_scalar(nir, prog, stage);<br>
>     else<br>
>        brw_lower_nir_io_vec4(nir, stage);<br>
><br>
> --<br>
> 2.5.0<br>
><br>
> _______________________________________________<br>
> mesa-dev mailing list<br>
> <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
> <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</p>