On Dec 11, 2015 1:24 PM, "Kenneth Graunke" <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>> wrote: > > My tessellation branch has two additional remap functions. I don't want > to replicate this logic there. > > v2: Handle inputs/outputs separately (suggested by Jason Ekstrand). > > Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>> > --- > src/mesa/drivers/dri/i965/brw_nir.c | 93 +++++++++++++++++++++++++++---------- > 1 file changed, 68 insertions(+), 25 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c > index 14ad172..afb1f6d 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.c > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > @@ -27,15 +27,41 @@ > #include "glsl/nir/nir_builder.h" > #include "program/prog_to_nir.h" > > -struct remap_vs_attrs_state { > +static bool > +is_input(nir_intrinsic_instr *intrin) > +{ > + return intrin->intrinsic == nir_intrinsic_load_input || > + intrin->intrinsic == nir_intrinsic_load_per_vertex_input; > +} > + > +static bool > +is_output(nir_intrinsic_instr *intrin) > +{ > + return intrin->intrinsic == nir_intrinsic_load_output || > + intrin->intrinsic == nir_intrinsic_load_per_vertex_output || > + intrin->intrinsic == nir_intrinsic_store_output || > + intrin->intrinsic == nir_intrinsic_store_per_vertex_output; > +} > + > +/** > + * In many cases, we just add the base and offset together, so there's no > + * reason to keep them separate. Sometimes, combining them is essential: > + * if a shader only accesses part of a compound variable (such as a matrix > + * or array), the variable's base may not actually exist in the VUE map. > + * > + * This pass adds constant offsets to instr->const_index[0], and resets > + * the offset source to 0. Non-constant offsets remain unchanged. > + */ > +struct add_const_offset_to_base_params { > nir_builder b; > - uint64_t inputs_read; > + nir_variable_mode mode; > }; > > static bool > -remap_vs_attrs(nir_block *block, void *void_state) > +add_const_offset_to_base(nir_block *block, void *closure) > { > - struct remap_vs_attrs_state *state = void_state; > + struct add_const_offset_to_base_params *params = closure; > + nir_builder *b = &params->b; > > nir_foreach_instr_safe(block, instr) { > if (instr->type != nir_instr_type_intrinsic) > @@ -43,30 +69,44 @@ remap_vs_attrs(nir_block *block, void *void_state) > > nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); > > + if ((params->mode == nir_var_shader_in && is_input(intrin)) || > + (params->mode == nir_var_shader_out && is_output(intrin))) { > + nir_src *offset = nir_get_io_offset_src(intrin); > + nir_const_value *const_offset = nir_src_as_const_value(*offset); > + > + if (const_offset) { > + intrin->const_index[0] += const_offset->u[0]; > + b->cursor = nir_before_instr(&intrin->instr); > + nir_instr_rewrite_src(&intrin->instr, offset, > + nir_src_for_ssa(nir_imm_int(b, 0))); > + } > + } > + } > + return true; > + > +} > + > +static bool > +remap_vs_attrs(nir_block *block, void *closure) > +{ > + GLbitfield64 inputs_read = *((GLbitfield64 *) closure); > + > + nir_foreach_instr(block, instr) { > + if (instr->type != nir_instr_type_intrinsic) > + continue; > + > + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); > + > if (intrin->intrinsic == nir_intrinsic_load_input) { > /* Attributes come in a contiguous block, ordered by their > * gl_vert_attrib value. That means we can compute the slot > * number for an attribute by masking out the enabled attributes > * before it and counting the bits. > */ > - nir_const_value *const_offset = nir_src_as_const_value(intrin->src[0]); > - > - /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ > - assert(const_offset); > - > - int attr = intrin->const_index[0] + const_offset->u[0]; > - int slot = _mesa_bitcount_64(state->inputs_read & > - BITFIELD64_MASK(attr)); > + int attr = intrin->const_index[0]; > + int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr)); I would like a comment in here somewhere saying why it's OK to leave non-const offsets alone. > > - /* The NIR -> FS pass will just add the base and offset together, so > - * there's no reason to keep them separate. Just put it all in > - * const_index[0] and set the offset src[0] to load_const(0). > - */ > intrin->const_index[0] = 4 * slot; > - > - state->b.cursor = nir_before_instr(&intrin->instr); > - nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], > - nir_src_for_ssa(nir_imm_int(&state->b, 0))); > } > } > return true; > @@ -77,6 +117,10 @@ brw_nir_lower_inputs(nir_shader *nir, > const struct brw_device_info *devinfo, > bool is_scalar) > { > + struct add_const_offset_to_base_params params = { > + .mode = nir_var_shader_in > + }; > + > switch (nir->stage) { > case MESA_SHADER_VERTEX: > /* Start with the location of the variable's base. */ > @@ -97,17 +141,16 @@ brw_nir_lower_inputs(nir_shader *nir, > * key->inputs_read since the two are identical aside from Gen4-5 > * edge flag differences. > */ > - struct remap_vs_attrs_state remap_state = { > - .inputs_read = nir->info.inputs_read, > - }; > + GLbitfield64 inputs_read = nir->info.inputs_read; > > /* This pass needs actual constants */ > nir_opt_constant_folding(nir); > > nir_foreach_overload(nir, overload) { > if (overload->impl) { > - nir_builder_init(&remap_state.b, overload->impl); > - nir_foreach_block(overload->impl, remap_vs_attrs, &remap_state); > + nir_builder_init(&params.b, overload->impl); > + nir_foreach_block(overload->impl, add_const_offset_to_base, &params); > + nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read); > } > } > } > -- > 2.6.3 > > _______________________________________________ > mesa-dev mailing list > <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a> > <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a>