[Mesa-dev] [PATCH 9/9] i965/vec4: Use NIR to do GS input remapping
Alejandro Piñeiro
apinheiro at igalia.com
Sat May 6 06:17:10 UTC 2017
On 05/05/17 21:24, Jason Ekstrand wrote:
> On Fri, May 5, 2017 at 1:04 AM, Alejandro Piñeiro
> <apinheiro at igalia.com <mailto:apinheiro at igalia.com>> wrote:
>
> On 05/05/17 04:11, Jason Ekstrand wrote:
> > We're already doing this in the FS back-end. This just does the
> same
> > thing in the vec4 back-end.
> > ---
> > src/intel/compiler/brw_nir.c | 3 --
> > src/intel/compiler/brw_vec4.cpp | 44 -------------------
> > src/intel/compiler/brw_vec4.h | 2 -
> > src/intel/compiler/brw_vec4_gs_nir.cpp | 10 ++---
> > src/intel/compiler/brw_vec4_gs_visitor.cpp | 70
> +++++++++++++++++++++---------
> > src/intel/compiler/brw_vec4_gs_visitor.h | 4 +-
> > src/intel/compiler/gen6_gs_visitor.cpp | 4 +-
> > 7 files changed, 56 insertions(+), 81 deletions(-)
> >
> > diff --git a/src/intel/compiler/brw_nir.c
> b/src/intel/compiler/brw_nir.c
> > index d92e8fa..8a9f89f 100644
> > --- a/src/intel/compiler/brw_nir.c
> > +++ b/src/intel/compiler/brw_nir.c
> > @@ -342,9 +342,6 @@ brw_nir_lower_vue_inputs(nir_shader *nir,
> bool is_scalar,
> > /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
> > nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
> >
> > - if (nir->stage == MESA_SHADER_GEOMETRY && !is_scalar)
> > - return;
> > -
> > /* This pass needs actual constants */
> > nir_opt_constant_folding(nir);
> >
> > diff --git a/src/intel/compiler/brw_vec4.cpp
> b/src/intel/compiler/brw_vec4.cpp
> > index b387321..ad92951 100644
> > --- a/src/intel/compiler/brw_vec4.cpp
> > +++ b/src/intel/compiler/brw_vec4.cpp
> > @@ -1693,50 +1693,6 @@ attribute_to_hw_reg(int attr,
> brw_reg_type type, bool interleaved)
> > return reg;
> > }
> >
> > -
> > -/**
> > - * Replace each register of type ATTR in this->instructions
> with a reference
> > - * to a fixed HW register.
> > - *
> > - * If interleaved is true, then each attribute takes up half a
> register, with
> > - * register N containing attribute 2*N in its first half and
> attribute 2*N+1
> > - * in its second half (this corresponds to the payload setup
> used by geometry
> > - * shaders in "single" or "dual instanced" dispatch mode). If
> interleaved is
> > - * false, then each attribute takes up a whole register, with
> register N
> > - * containing attribute N (this corresponds to the payload
> setup used by
> > - * vertex shaders, and by geometry shaders in "dual object"
> dispatch mode).
> > - */
> > -void
> > -vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
> > - bool interleaved)
> > -{
> > - foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> > - for (int i = 0; i < 3; i++) {
> > - if (inst->src[i].file != ATTR)
> > - continue;
> > -
> > - int grf = attribute_map[inst->src[i].nr +
> > - inst->src[i].offset / REG_SIZE];
> > - assert(inst->src[i].offset % REG_SIZE == 0);
> > -
> > - /* All attributes used in the shader need to have been
> assigned a
> > - * hardware register by the caller
> > - */
> > - assert(grf != 0);
> > -
> > - struct brw_reg reg =
> > - attribute_to_hw_reg(grf, inst->src[i].type,
> interleaved);
> > - reg.swizzle = inst->src[i].swizzle;
> > - if (inst->src[i].abs)
> > - reg = brw_abs(reg);
> > - if (inst->src[i].negate)
> > - reg = negate(reg);
> > -
> > - inst->src[i] = reg;
> > - }
> > - }
> > -}
> > -
> > int
> > vec4_vs_visitor::setup_attributes(int payload_reg)
> > {
> > diff --git a/src/intel/compiler/brw_vec4.h
> b/src/intel/compiler/brw_vec4.h
> > index 89adfaa..0f92f34 100644
> > --- a/src/intel/compiler/brw_vec4.h
> > +++ b/src/intel/compiler/brw_vec4.h
> > @@ -367,8 +367,6 @@ public:
> >
> > protected:
> > void emit_vertex();
> > - void lower_attributes_to_hw_regs(const int *attribute_map,
> > - bool interleaved);
> > void setup_payload_interference(struct ra_graph *g, int
> first_payload_node,
> > int reg_node_count);
> > virtual void setup_payload() = 0;
> > diff --git a/src/intel/compiler/brw_vec4_gs_nir.cpp
> b/src/intel/compiler/brw_vec4_gs_nir.cpp
> > index ed8c03b..577f587 100644
> > --- a/src/intel/compiler/brw_vec4_gs_nir.cpp
> > +++ b/src/intel/compiler/brw_vec4_gs_nir.cpp
> > @@ -66,8 +66,10 @@
> vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> > nir_const_value *vertex =
> nir_src_as_const_value(instr->src[0]);
> > nir_const_value *offset_reg =
> nir_src_as_const_value(instr->src[1]);
> >
> > + const unsigned input_array_stride =
> prog_data->urb_read_length * 2;
>
> The change of using the input_array_stride instead of
> BRW_VARYING_SLOT_COUNT is not evident (at least to me). There is a
> comment explaining the stride purpose on
> vec4_gs_visitor::setup_varying_inputs, but perhaps a note here
> would be
> welcomed. You can drop this suggestion if you prefer.
>
>
> We were using BRW_VARYING_SLOT_COUNT before because we were doing a
> generic thing here that we would later remap using
> input_array_stride. Now we're just using input_array_stride
> directly. I think if this weren't a change, you would probably not
> notice. :-) How about:
>
> All of the inputs for the first vertex come in first followed by all
> inputs for the second vertex etc. The stride between vertices is
> governed by the URB read length.
Sounds good to me.
>
> --Jason
>
>
> Other than that:
> Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com
> <mailto:apinheiro at igalia.com>>
>
This still applies (in case you missed).
>
> > +
> > if (nir_dest_bit_size(instr->dest) == 64) {
> > - src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT *
> vertex->u32[0] +
> > + src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
> > instr->const_index[0] + offset_reg->u32[0],
> > glsl_type::dvec4_type);
> >
> > @@ -85,15 +87,11 @@
> vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> > /* Make up a type...we have no way of knowing... */
> > const glsl_type *const type =
> glsl_type::ivec(instr->num_components);
> >
> > - src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT *
> vertex->u32[0] +
> > + src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
> > instr->const_index[0] + offset_reg->u32[0],
> > type);
> > src.swizzle =
> BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
> >
> > - /* gl_PointSize is passed in the .w component of the
> VUE header */
> > - if (instr->const_index[0] == VARYING_SLOT_PSIZ)
> > - src.swizzle = BRW_SWIZZLE_WWWW;
> > -
> > dest = get_nir_dest(instr->dest, src.type);
> > dest.writemask =
> brw_writemask_for_size(instr->num_components);
> > emit(MOV(dest, src));
> > diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp
> b/src/intel/compiler/brw_vec4_gs_visitor.cpp
> > index 4a8b5be..516ab0b 100644
> > --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
> > +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
> > @@ -29,6 +29,7 @@
> >
> > #include "brw_vec4_gs_visitor.h"
> > #include "gen6_gs_visitor.h"
> > +#include "brw_cfg.h"
> > #include "brw_fs.h"
> > #include "brw_nir.h"
> > #include "common/gen_debug.h"
> > @@ -72,9 +73,36 @@
> vec4_gs_visitor::make_reg_for_system_value(int location)
> > return reg;
> > }
> >
> > +static inline struct brw_reg
> > +attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
> > +{
> > + struct brw_reg reg;
> >
> > + unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type));
> > + if (interleaved) {
> > + reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) *
> 4), 0, width, 1);
> > + } else {
> > + reg = brw_vecn_grf(width, attr, 0);
> > + }
> > +
> > + reg.type = type;
> > + return reg;
> > +}
> > +
> > +/**
> > + * Replace each register of type ATTR in this->instructions
> with a reference
> > + * to a fixed HW register.
> > + *
> > + * If interleaved is true, then each attribute takes up half a
> register, with
> > + * register N containing attribute 2*N in its first half and
> attribute 2*N+1
> > + * in its second half (this corresponds to the payload setup
> used by geometry
> > + * shaders in "single" or "dual instanced" dispatch mode). If
> interleaved is
> > + * false, then each attribute takes up a whole register, with
> register N
> > + * containing attribute N (this corresponds to the payload
> setup used by
> > + * vertex shaders, and by geometry shaders in "dual object"
> dispatch mode).
> > + */
> > int
> > -vec4_gs_visitor::setup_varying_inputs(int payload_reg, int
> *attribute_map,
> > +vec4_gs_visitor::setup_varying_inputs(int payload_reg,
> > int attributes_per_reg)
> > {
> > /* For geometry shaders there are N copies of the input
> attributes, where N
> > @@ -89,12 +117,24 @@ vec4_gs_visitor::setup_varying_inputs(int
> payload_reg, int *attribute_map,
> > assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
> > unsigned input_array_stride = prog_data->urb_read_length * 2;
> >
> > - for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
> > - int varying = c->input_vue_map.slot_to_varying[slot];
> > - for (unsigned vertex = 0; vertex < num_input_vertices;
> vertex++) {
> > - attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] =
> > - attributes_per_reg * payload_reg +
> input_array_stride * vertex +
> > - slot;
> > + foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> > + for (int i = 0; i < 3; i++) {
> > + if (inst->src[i].file != ATTR)
> > + continue;
> > +
> > + assert(inst->src[i].offset % REG_SIZE == 0);
> > + int grf = payload_reg * attributes_per_reg +
> > + inst->src[i].nr + inst->src[i].offset /
> REG_SIZE;
> > +
> > + struct brw_reg reg =
> > + attribute_to_hw_reg(grf, inst->src[i].type,
> attributes_per_reg > 1);
> > + reg.swizzle = inst->src[i].swizzle;
> > + if (inst->src[i].abs)
> > + reg = brw_abs(reg);
> > + if (inst->src[i].negate)
> > + reg = negate(reg);
> > +
> > + inst->src[i] = reg;
> > }
> > }
> >
> > @@ -103,25 +143,15 @@ vec4_gs_visitor::setup_varying_inputs(int
> payload_reg, int *attribute_map,
> > return payload_reg + regs_used;
> > }
> >
> > -
> > void
> > vec4_gs_visitor::setup_payload()
> > {
> > - int attribute_map[BRW_VARYING_SLOT_COUNT *
> MAX_GS_INPUT_VERTICES];
> > -
> > /* If we are in dual instanced or single mode, then
> attributes are going
> > * to be interleaved, so one register contains two attribute
> slots.
> > */
> > int attributes_per_reg =
> > prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT
> ? 1 : 2;
> >
> > - /* If a geometry shader tries to read from an input that
> wasn't written by
> > - * the vertex shader, that produces undefined results, but
> it shouldn't
> > - * crash anything. So initialize attribute_map to
> zeros--that ensures that
> > - * these undefined results are read from r0.
> > - */
> > - memset(attribute_map, 0, sizeof(attribute_map));
> > -
> > int reg = 0;
> >
> > /* The payload always contains important data in r0, which
> contains
> > @@ -132,13 +162,11 @@ vec4_gs_visitor::setup_payload()
> >
> > /* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
> > if (gs_prog_data->include_primitive_id)
> > - attribute_map[VARYING_SLOT_PRIMITIVE_ID] =
> attributes_per_reg * reg++;
> > + reg++;
> >
> > reg = setup_uniforms(reg);
> >
> > - reg = setup_varying_inputs(reg, attribute_map,
> attributes_per_reg);
> > -
> > - lower_attributes_to_hw_regs(attribute_map,
> attributes_per_reg > 1);
> > + reg = setup_varying_inputs(reg, attributes_per_reg);
> >
> > this->first_non_payload_grf = reg;
> > }
> > diff --git a/src/intel/compiler/brw_vec4_gs_visitor.h
> b/src/intel/compiler/brw_vec4_gs_visitor.h
> > index 09221f9..6b21a33 100644
> > --- a/src/intel/compiler/brw_vec4_gs_visitor.h
> > +++ b/src/intel/compiler/brw_vec4_gs_visitor.h
> > @@ -33,6 +33,7 @@
> > #include "brw_vec4.h"
> >
> > #define MAX_GS_INPUT_VERTICES 6
> > +#define MAX_GS_INPUT_VARYINGS 32
> >
> > #ifdef __cplusplus
> > namespace brw {
> > @@ -64,8 +65,7 @@ protected:
> > virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
> >
> > protected:
> > - int setup_varying_inputs(int payload_reg, int *attribute_map,
> > - int attributes_per_reg);
> > + int setup_varying_inputs(int payload_reg, int
> attributes_per_reg);
> > void emit_control_data_bits();
> > void set_stream_control_data_bits(unsigned stream_id);
> >
> > diff --git a/src/intel/compiler/gen6_gs_visitor.cpp
> b/src/intel/compiler/gen6_gs_visitor.cpp
> > index 075bc4a..afc6056 100644
> > --- a/src/intel/compiler/gen6_gs_visitor.cpp
> > +++ b/src/intel/compiler/gen6_gs_visitor.cpp
> > @@ -516,9 +516,7 @@ gen6_gs_visitor::setup_payload()
> >
> > reg = setup_uniforms(reg);
> >
> > - reg = setup_varying_inputs(reg, attribute_map,
> attributes_per_reg);
> > -
> > - lower_attributes_to_hw_regs(attribute_map, true);
> > + reg = setup_varying_inputs(reg, attributes_per_reg);
> >
> > this->first_non_payload_grf = reg;
> > }
>
>
>
More information about the mesa-dev
mailing list