[Mesa-dev] [PATCH 02/15] i965: enable component packing for vs and fs

Timothy Arceri timothy.arceri at collabora.com
Tue Jul 19 11:38:21 UTC 2016


On Tue, 2016-07-19 at 13:03 +0200, Alejandro Piñeiro wrote:
> Is this the correct version of the patch? It uses nir_lower_io with 4
> parameters, while nir_lower_io on master uses 3 (and afaik, it has
> been
> using 3 for a while).
> 
> FWIW, this patch doesn't apply cleanly with current master mesa.

Hi,

They apply on top of this series: https://patchwork.freedesktop.org/ser
ies/10000/

It's been reviewed by Chris so hopefully Ken will push it soon.


> 
> BR
> 
> On 19/07/16 08:33, Timothy Arceri wrote:
> > Rather than trying to work out the total number of components
> > used at a location we simply treat all outputs as vec4s.
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.h           |  1 -
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp     | 22 ++++++++++----
> > --------
> >  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++--------
> >  src/mesa/drivers/dri/i965/brw_nir.c          |  8 ++++----
> >  4 files changed, 16 insertions(+), 25 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> > b/src/mesa/drivers/dri/i965/brw_fs.h
> > index 574475f..fc1e1c4 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -317,7 +317,6 @@ public:
> >     fs_reg frag_stencil;
> >     fs_reg sample_mask;
> >     fs_reg outputs[VARYING_SLOT_MAX];
> > -   unsigned output_components[VARYING_SLOT_MAX];
> >     fs_reg dual_src_output;
> >     bool do_dual_src;
> >     int first_non_payload_grf;
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > index 610c151..395594f 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > @@ -67,13 +67,12 @@
> > fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
> >        }
> >     } else {
> >        assert(type->is_scalar() || type->is_vector());
> > -      unsigned num_elements = type->vector_elements;
> > +      unsigned num_iter = 1;
> >        if (type->is_double())
> > -         num_elements *= 2;
> > -      for (unsigned count = 0; count < num_elements; count += 4) {
> > +         num_iter = 2;
> > +      for (unsigned count = 0; count < num_iter; count++) {
> >           this->outputs[*location] = *reg;
> > -         this->output_components[*location] = MIN2(4, num_elements
> > - count);
> > -         *reg = offset(*reg, bld, this-
> > >output_components[*location]);
> > +         *reg = offset(*reg, bld, 4);
> >           (*location)++;
> >        }
> >     }
> > @@ -114,7 +113,6 @@ fs_visitor::nir_setup_outputs()
> >              /* Writing gl_FragColor outputs to all color regions.
> > */
> >              for (unsigned int i = 0; i < MAX2(key-
> > >nr_color_regions, 1); i++) {
> >                 this->outputs[i] = reg;
> > -               this->output_components[i] = 4;
> >              }
> >           } else if (var->data.location == FRAG_RESULT_DEPTH) {
> >              this->frag_depth = reg;
> > @@ -123,8 +121,6 @@ fs_visitor::nir_setup_outputs()
> >           } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
> > {
> >              this->sample_mask = reg;
> >           } else {
> > -            int vector_elements = var->type->without_array()-
> > >vector_elements;
> > -
> >              /* gl_FragData or a user-defined FS output */
> >              assert(var->data.location >= FRAG_RESULT_DATA0 &&
> >                     var->data.location <
> > FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
> > @@ -132,8 +128,7 @@ fs_visitor::nir_setup_outputs()
> >              /* General color output. */
> >              for (unsigned int i = 0; i < MAX2(1, var->type-
> > >length); i++) {
> >                 int output = var->data.location - FRAG_RESULT_DATA0
> > + i;
> > -               this->outputs[output] = offset(reg, bld,
> > vector_elements * i);
> > -               this->output_components[output] = vector_elements;
> > +               this->outputs[output] = offset(reg, bld, 4 * i);
> >              }
> >           }
> >           break;
> > @@ -3892,6 +3887,7 @@ fs_visitor::nir_emit_intrinsic(const
> > fs_builder &bld, nir_intrinsic_instr *instr
> >  
> >     case nir_intrinsic_load_input: {
> >        fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
> > +      unsigned first_component = nir_intrinsic_component(instr);
> >        unsigned num_components = instr->num_components;
> >        enum brw_reg_type type = dest.type;
> >  
> > @@ -3900,7 +3896,7 @@ fs_visitor::nir_emit_intrinsic(const
> > fs_builder &bld, nir_intrinsic_instr *instr
> >        src = offset(src, bld, const_offset->u32[0]);
> >  
> >        for (unsigned j = 0; j < num_components; j++) {
> > -         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
> > +         bld.MOV(offset(dest, bld, j), offset(src, bld, j +
> > first_component));
> >        }
> >  
> >        if (type == BRW_REGISTER_TYPE_DF) {
> > @@ -4026,6 +4022,7 @@ fs_visitor::nir_emit_intrinsic(const
> > fs_builder &bld, nir_intrinsic_instr *instr
> >        new_dest = offset(new_dest, bld, const_offset->u32[0]);
> >  
> >        unsigned num_components = instr->num_components;
> > +      unsigned first_component = nir_intrinsic_component(instr);
> >        unsigned bit_size = instr->src[0].is_ssa ?
> >           instr->src[0].ssa->bit_size : instr->src[0].reg.reg-
> > >bit_size;
> >        if (bit_size == 64) {
> > @@ -4039,7 +4036,8 @@ fs_visitor::nir_emit_intrinsic(const
> > fs_builder &bld, nir_intrinsic_instr *instr
> >        }
> >  
> >        for (unsigned j = 0; j < num_components; j++) {
> > -         bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
> > +         bld.MOV(offset(new_dest, bld, j + first_component),
> > +                 offset(src, bld, j));
> >        }
> >        break;
> >     }
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > index 156a630..6d84374 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > @@ -459,8 +459,7 @@ fs_visitor::emit_fb_writes()
> >              src0_alpha = offset(outputs[0], bld, 3);
> >  
> >           inst = emit_single_fb_write(abld, this->outputs[target],
> > reg_undef,
> > -                                     src0_alpha,
> > -                                     this-
> > >output_components[target]);
> > +                                     src0_alpha, 4);
> >           inst->target = target;
> >        }
> >     }
> > @@ -545,9 +544,7 @@ void
> > fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
> >     const fs_builder abld = bld.annotate("user clip distances");
> >  
> >     this->outputs[VARYING_SLOT_CLIP_DIST0] =
> > vgrf(glsl_type::vec4_type);
> > -   this->output_components[VARYING_SLOT_CLIP_DIST0] = 4;
> >     this->outputs[VARYING_SLOT_CLIP_DIST1] =
> > vgrf(glsl_type::vec4_type);
> > -   this->output_components[VARYING_SLOT_CLIP_DIST1] = 4;
> >  
> >     for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
> >        fs_reg u = userplane[i];
> > @@ -724,10 +721,8 @@ fs_visitor::emit_urb_writes(const fs_reg
> > &gs_vertex_count)
> >                 sources[length++] = reg;
> >              }
> >           } else {
> > -            for (unsigned i = 0; i < output_components[varying];
> > i++)
> > +            for (unsigned i = 0; i < 4; i++)
> >                 sources[length++] = offset(this->outputs[varying],
> > bld, i);
> > -            for (unsigned i = output_components[varying]; i < 4;
> > i++)
> > -               sources[length++] = brw_imm_d(0);
> >           }
> >           break;
> >        }
> > @@ -901,7 +896,6 @@ fs_visitor::init()
> >     this->nir_ssa_values = NULL;
> >  
> >     memset(&this->payload, 0, sizeof(this->payload));
> > -   memset(this->output_components, 0, sizeof(this-
> > >output_components));
> >     this->source_depth_to_render_target = false;
> >     this->runtime_check_aads_emit = false;
> >     this->first_non_payload_grf = 0;
> > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> > b/src/mesa/drivers/dri/i965/brw_nir.c
> > index d1a823a..8f68c39 100644
> > --- a/src/mesa/drivers/dri/i965/brw_nir.c
> > +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> > @@ -302,8 +302,8 @@ brw_nir_lower_vue_outputs(nir_shader *nir,
> >     if (is_scalar) {
> >        nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
> >                                 VARYING_SLOT_VAR0,
> > -                               type_size_scalar);
> > -      nir_lower_io(nir, nir_var_shader_out, type_size_scalar,
> > false);
> > +                               type_size_vec4_times_4);
> > +      nir_lower_io(nir, nir_var_shader_out,
> > type_size_vec4_times_4, false);
> >     } else {
> >        nir_foreach_variable(var, &nir->outputs)
> >           var->data.driver_location = var->data.location;
> > @@ -340,8 +340,8 @@ void
> >  brw_nir_lower_fs_outputs(nir_shader *nir)
> >  {
> >     nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
> > -                            FRAG_RESULT_DATA0, type_size_scalar);
> > -   nir_lower_io(nir, nir_var_shader_out, type_size_scalar, false);
> > +                            FRAG_RESULT_DATA0,
> > type_size_vec4_times_4);
> > +   nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4,
> > false);
> >  }
> >  
> >  void
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list