<p dir="ltr"><br>
On Dec 11, 2015 1:24 PM, "Kenneth Graunke" <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>> wrote:<br>
><br>
> My tessellation branch has two additional remap functions.  I don't want<br>
> to replicate this logic there.<br>
><br>
> v2: Handle inputs/outputs separately (suggested by Jason Ekstrand).<br>
><br>
> Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
> ---<br>
>  src/mesa/drivers/dri/i965/brw_nir.c | 93 +++++++++++++++++++++++++++----------<br>
>  1 file changed, 68 insertions(+), 25 deletions(-)<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c<br>
> index 14ad172..afb1f6d 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_nir.c<br>
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c<br>
> @@ -27,15 +27,41 @@<br>
>  #include "glsl/nir/nir_builder.h"<br>
>  #include "program/prog_to_nir.h"<br>
><br>
> -struct remap_vs_attrs_state {<br>
> +static bool<br>
> +is_input(nir_intrinsic_instr *intrin)<br>
> +{<br>
> +   return intrin->intrinsic == nir_intrinsic_load_input ||<br>
> +          intrin->intrinsic == nir_intrinsic_load_per_vertex_input;<br>
> +}<br>
> +<br>
> +static bool<br>
> +is_output(nir_intrinsic_instr *intrin)<br>
> +{<br>
> +   return intrin->intrinsic == nir_intrinsic_load_output ||<br>
> +          intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||<br>
> +          intrin->intrinsic == nir_intrinsic_store_output ||<br>
> +          intrin->intrinsic == nir_intrinsic_store_per_vertex_output;<br>
> +}<br>
> +<br>
> +/**<br>
> + * In many cases, we just add the base and offset together, so there's no<br>
> + * reason to keep them separate.  Sometimes, combining them is essential:<br>
> + * if a shader only accesses part of a compound variable (such as a matrix<br>
> + * or array), the variable's base may not actually exist in the VUE map.<br>
> + *<br>
> + * This pass adds constant offsets to instr->const_index[0], and resets<br>
> + * the offset source to 0.  Non-constant offsets remain unchanged.<br>
> + */<br>
> +struct add_const_offset_to_base_params {<br>
>     nir_builder b;<br>
> -   uint64_t inputs_read;<br>
> +   nir_variable_mode mode;<br>
>  };<br>
><br>
>  static bool<br>
> -remap_vs_attrs(nir_block *block, void *void_state)<br>
> +add_const_offset_to_base(nir_block *block, void *closure)<br>
>  {<br>
> -   struct remap_vs_attrs_state *state = void_state;<br>
> +   struct add_const_offset_to_base_params *params = closure;<br>
> +   nir_builder *b = &params->b;<br>
><br>
>     nir_foreach_instr_safe(block, instr) {<br>
>        if (instr->type != nir_instr_type_intrinsic)<br>
> @@ -43,30 +69,44 @@ remap_vs_attrs(nir_block *block, void *void_state)<br>
><br>
>        nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
><br>
> +      if ((params->mode == nir_var_shader_in && is_input(intrin)) ||<br>
> +          (params->mode == nir_var_shader_out && is_output(intrin))) {<br>
> +         nir_src *offset = nir_get_io_offset_src(intrin);<br>
> +         nir_const_value *const_offset = nir_src_as_const_value(*offset);<br>
> +<br>
> +         if (const_offset) {<br>
> +            intrin->const_index[0] += const_offset->u[0];<br>
> +            b->cursor = nir_before_instr(&intrin->instr);<br>
> +            nir_instr_rewrite_src(&intrin->instr, offset,<br>
> +                                  nir_src_for_ssa(nir_imm_int(b, 0)));<br>
> +         }<br>
> +      }<br>
> +   }<br>
> +   return true;<br>
> +<br>
> +}<br>
> +<br>
> +static bool<br>
> +remap_vs_attrs(nir_block *block, void *closure)<br>
> +{<br>
> +   GLbitfield64 inputs_read = *((GLbitfield64 *) closure);<br>
> +<br>
> +   nir_foreach_instr(block, instr) {<br>
> +      if (instr->type != nir_instr_type_intrinsic)<br>
> +         continue;<br>
> +<br>
> +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
> +<br>
>        if (intrin->intrinsic == nir_intrinsic_load_input) {<br>
>           /* Attributes come in a contiguous block, ordered by their<br>
>            * gl_vert_attrib value.  That means we can compute the slot<br>
>            * number for an attribute by masking out the enabled attributes<br>
>            * before it and counting the bits.<br>
>            */<br>
> -         nir_const_value *const_offset = nir_src_as_const_value(intrin->src[0]);<br>
> -<br>
> -         /* We set EmitNoIndirect for VS inputs, so there are no indirects. */<br>
> -         assert(const_offset);<br>
> -<br>
> -         int attr = intrin->const_index[0] + const_offset->u[0];<br>
> -         int slot = _mesa_bitcount_64(state->inputs_read &<br>
> -                                      BITFIELD64_MASK(attr));<br>
> +         int attr = intrin->const_index[0];<br>
> +         int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));</p>
<p dir="ltr">I would like a comment in here somewhere saying why it's OK to leave non-const offsets alone.</p>
<p dir="ltr">><br>
> -         /* The NIR -> FS pass will just add the base and offset together, so<br>
> -          * there's no reason to keep them separate.  Just put it all in<br>
> -          * const_index[0] and set the offset src[0] to load_const(0).<br>
> -          */<br>
>           intrin->const_index[0] = 4 * slot;<br>
> -<br>
> -         state->b.cursor = nir_before_instr(&intrin->instr);<br>
> -         nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],<br>
> -                               nir_src_for_ssa(nir_imm_int(&state->b, 0)));<br>
>        }<br>
>     }<br>
>     return true;<br>
> @@ -77,6 +117,10 @@ brw_nir_lower_inputs(nir_shader *nir,<br>
>                       const struct brw_device_info *devinfo,<br>
>                       bool is_scalar)<br>
>  {<br>
> +   struct add_const_offset_to_base_params params = {<br>
> +      .mode = nir_var_shader_in<br>
> +   };<br>
> +<br>
>     switch (nir->stage) {<br>
>     case MESA_SHADER_VERTEX:<br>
>        /* Start with the location of the variable's base. */<br>
> @@ -97,17 +141,16 @@ brw_nir_lower_inputs(nir_shader *nir,<br>
>            * key->inputs_read since the two are identical aside from Gen4-5<br>
>            * edge flag differences.<br>
>            */<br>
> -         struct remap_vs_attrs_state remap_state = {<br>
> -            .inputs_read = nir->info.inputs_read,<br>
> -         };<br>
> +         GLbitfield64 inputs_read = nir->info.inputs_read;<br>
><br>
>           /* This pass needs actual constants */<br>
>           nir_opt_constant_folding(nir);<br>
><br>
>           nir_foreach_overload(nir, overload) {<br>
>              if (overload->impl) {<br>
> -               nir_builder_init(&remap_state.b, overload->impl);<br>
> -               nir_foreach_block(overload->impl, remap_vs_attrs, &remap_state);<br>
> +               nir_builder_init(&params.b, overload->impl);<br>
> +               nir_foreach_block(overload->impl, add_const_offset_to_base, &params);<br>
> +               nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);<br>
>              }<br>
>           }<br>
>        }<br>
> --<br>
> 2.6.3<br>
><br>
> _______________________________________________<br>
> mesa-dev mailing list<br>
> <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
> <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</p>