[Mesa-dev] [PATCH 8/8] i965/vs: Simplify fs_visitor's ATTR file.
Jason Ekstrand
jason at jlekstrand.net
Mon Aug 17 21:53:20 PDT 2015
On Aug 17, 2015 4:08 PM, "Kenneth Graunke" <kenneth at whitecape.org> wrote:
>
> Previously, ATTR was indexed by VERT_ATTRIB_* slots; at the end of
> compilation, assign_vs_urb_setup() translated those into GRF units,
> and converted ATTR to HW_REGs.
>
> This patch moves the transslation earlier, making ATTR work in terms of
> GRF units from the beginning. assign_vs_urb_setup() simply has to add
> the number of payload registers and push constants to obtain the final
> hardware GRF number. (We can't do this earlier as those values aren't
> known.)
>
> ATTR still supports reg_offset; however, it's simply added to reg.
> It's not clear whether this is valuable or not.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 26 +++------------
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
> src/mesa/drivers/dri/i965/brw_nir.c | 48
+++++++++++++++++++++++++---
> 3 files changed, 50 insertions(+), 26 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index efabb52..f556ed6 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1511,10 +1511,9 @@ void
> fs_visitor::assign_vs_urb_setup()
> {
> brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
> - int grf, count, slot, channel, attr;
>
> assert(stage == MESA_SHADER_VERTEX);
> - count = _mesa_bitcount_64(vs_prog_data->inputs_read);
> + int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
> if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
> count++;
>
> @@ -1534,25 +1533,10 @@ fs_visitor::assign_vs_urb_setup()
> foreach_block_and_inst(block, fs_inst, inst, cfg) {
> for (int i = 0; i < inst->sources; i++) {
> if (inst->src[i].file == ATTR) {
> -
> - if (inst->src[i].reg == VERT_ATTRIB_MAX) {
> - slot = count - 1;
> - } else {
> - /* Attributes come in in a contiguous block, ordered by
their
> - * gl_vert_attrib value. That means we can compute the
slot
> - * number for an attribute by masking out the enabled
> - * attributes before it and counting the bits.
> - */
> - attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
> - slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
> - BITFIELD64_MASK(attr));
> - }
> -
> - channel = inst->src[i].reg_offset & 3;
> -
> - grf = payload.num_regs +
> - prog_data->curb_read_length +
> - slot * 4 + channel;
> + int grf = payload.num_regs +
> + prog_data->curb_read_length +
> + inst->src[i].reg +
> + inst->src[i].reg_offset;
>
> inst->src[i].file = HW_REG;
> inst->src[i].fixed_hw_reg =
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 111db8c..0ec8ef5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -53,7 +53,7 @@ fs_reg *
> fs_visitor::emit_vs_system_value(int location)
> {
> fs_reg *reg = new(this->mem_ctx)
> - fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D);
> + fs_reg(ATTR, 4*_mesa_bitcount_64(prog->InputsRead),
BRW_REGISTER_TYPE_D);
> brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
>
> switch (location) {
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
b/src/mesa/drivers/dri/i965/brw_nir.c
> index 2e2d02b..e0cf61e 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -27,8 +27,36 @@
> #include "glsl/nir/glsl_to_nir.h"
> #include "program/prog_to_nir.h"
>
> +static bool
> +remap_vs_attrs(nir_block *block, void *closure)
> +{
> + GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
> +
> + nir_foreach_instr(block, instr) {
> + if (instr->type != nir_instr_type_intrinsic)
> + continue;
> +
> + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +
> + /* We set EmitNoIndirect for VS inputs, so there are no indirects.
*/
> + assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);
> +
> + if (intrin->intrinsic == nir_intrinsic_load_input) {
> + /* Attributes come in a contiguous block, ordered by their
> + * gl_vert_attrib value. That means we can compute the slot
> + * number for an attribute by masking out the enabled attributes
> + * before it and counting the bits.
> + */
> + int attr = intrin->const_index[0];
> + int slot = _mesa_bitcount_64(inputs_read &
BITFIELD64_MASK(attr));
Um... Can't we just set var->data.driver_location to this value? What's the
point of it being its own pass?
> + intrin->const_index[0] = 4 * slot;
> + }
> + }
> + return true;
> +}
> +
> static void
> -lower_scalar_vs_inputs(nir_shader *nir)
> +lower_scalar_vs_inputs(nir_shader *nir, GLbitfield64 inputs_read)
> {
> /* Start with the location of the variable's base. */
> foreach_list_typed(nir_variable, var, node, &nir->inputs) {
> @@ -40,10 +68,19 @@ lower_scalar_vs_inputs(nir_shader *nir)
> * type_size_vec4 here.
> */
> nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
> +
> + /* Finally, translate VERT_ATTRIB_* values into the actual registers.
*/
> + nir_foreach_overload(nir, overload) {
> + if (overload->impl) {
> + nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
> + }
> + }
> }
>
> static void
> -brw_lower_nir_io_scalar(nir_shader *nir, gl_shader_stage stage)
> +brw_lower_nir_io_scalar(nir_shader *nir,
> + const struct gl_program *prog,
> + gl_shader_stage stage)
> {
> nir_assign_var_locations_direct_first(nir, &nir->uniforms,
> &nir->num_direct_uniforms,
> @@ -53,7 +90,10 @@ brw_lower_nir_io_scalar(nir_shader *nir,
gl_shader_stage stage)
>
> switch (stage) {
> case MESA_SHADER_VERTEX:
> - lower_scalar_vs_inputs(nir);
> + /* Note that we can use prog->InputsRead rather than
key->inputs_read,
> + * since the two identical aside from Gen4-5 edge flag differences.
> + */
> + lower_scalar_vs_inputs(nir, prog->InputsRead);
> break;
> default:
> nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
type_size_scalar);
> @@ -161,7 +201,7 @@ brw_create_nir(struct brw_context *brw,
> nir_optimize(nir, is_scalar);
>
> if (is_scalar)
> - brw_lower_nir_io_scalar(nir, stage);
> + brw_lower_nir_io_scalar(nir, prog, stage);
> else
> brw_lower_nir_io_vec4(nir, stage);
>
> --
> 2.5.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150817/17c95f3d/attachment-0001.html>
More information about the mesa-dev
mailing list