[Mesa-dev] [PATCH 02/17] i965: enable component packing for vs and fs

Timothy Arceri timothy.arceri at collabora.com
Fri Jul 8 05:01:26 UTC 2016


On Thu, 2016-07-07 at 20:12 -0700, Jason Ekstrand wrote:
> 
> On Jul 6, 2016 6:59 PM, "Timothy Arceri" <timothy.arceri at collabora.co
> m> wrote:
> >
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.cpp     | 20 ++++++++++++--------
> >  src/mesa/drivers/dri/i965/brw_fs.h       |  5 +++--
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 29
> ++++++++++++++++++++---------
> >  3 files changed, 35 insertions(+), 19 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index 2f473cc..9e7223e 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -1109,7 +1109,8 @@ fs_visitor::emit_general_interpolation(fs_reg
> *attr, const char *name,
> >                                         const glsl_type *type,
> >                                         glsl_interp_qualifier
> interpolation_mode,
> >                                         int *location, bool
> mod_centroid,
> > -                                       bool mod_sample)
> > +                                       bool mod_sample,
> > +                                       unsigned
> num_packed_components)
> >  {
> >     assert(stage == MESA_SHADER_FRAGMENT);
> >     brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this-
> >prog_data;
> > @@ -1131,22 +1132,26 @@
> fs_visitor::emit_general_interpolation(fs_reg *attr, const char
> *name,
> >
> >        for (unsigned i = 0; i < length; i++) {
> >           emit_general_interpolation(attr, name, elem_type,
> interpolation_mode,
> > -                                    location, mod_centroid,
> mod_sample);
> > +                                    location, mod_centroid,
> mod_sample,
> > +                                    num_packed_components);
> >        }
> >     } else if (type->is_record()) {
> >        for (unsigned i = 0; i < type->length; i++) {
> >           const glsl_type *field_type = type-
> >fields.structure[i].type;
> >           emit_general_interpolation(attr, name, field_type,
> interpolation_mode,
> > -                                    location, mod_centroid,
> mod_sample);
> > +                                    location, mod_centroid,
> mod_sample,
> > +                                    num_packed_components);
> >        }
> >     } else {
> >        assert(type->is_scalar() || type->is_vector());
> > +      unsigned num_components = num_packed_components ?
> > +         num_packed_components : type->vector_elements;
> >
> >        if (prog_data->urb_setup[*location] == -1) {
> >           /* If there's no incoming setup data for this slot, don't
> >            * emit interpolation for it.
> >            */
> > -         *attr = offset(*attr, bld, type->vector_elements);
> > +         *attr = offset(*attr, bld, num_components);

> This appears to be the only interesting use of num_components. 
> Pardon my while I ask a really stupid question:  why can't we just
> make it always 4 and call it a day?

The size of *attr ultimately comes from nir_assign_var_locations() so
this might work fine for the vec4 backend but for the scalar backend we
would need to count the size differently meaning some kind of hack.

However, as I pointed out in my other reply patch 4 also makes use of
num_packed_components for calculating the location of arrays elements
so there is still that to deal with also. 


> >           (*location)++;
> >           return;
> >        }
> > @@ -1158,7 +1163,6 @@ fs_visitor::emit_general_interpolation(fs_reg
> *attr, const char *name,
> >            * handed us defined values in only the constant offset
> >            * field of the setup reg.
> >            */
> > -         unsigned vector_elements = type->vector_elements;
> >
> >           /* Data starts at suboffet 3 in 32-bit units (12 bytes),
> so it is not
> >            * 64-bit aligned and the current implementation fails to
> read the
> > @@ -1166,10 +1170,10 @@
> fs_visitor::emit_general_interpolation(fs_reg *attr, const char
> *name,
> >            * read it as vector of floats with twice the number of
> components.
> >            */
> >           if (attr->type == BRW_REGISTER_TYPE_DF) {
> > -            vector_elements *= 2;
> > +            num_components *= 2;
> >              attr->type = BRW_REGISTER_TYPE_F;
> >           }
> > -         for (unsigned int i = 0; i < vector_elements; i++) {
> > +         for (unsigned int i = 0; i < num_components; i++) {
> >              struct brw_reg interp = interp_reg(*location, i);
> >              interp = suboffset(interp, 3);
> >              interp.type = attr->type;
> > @@ -1178,7 +1182,7 @@ fs_visitor::emit_general_interpolation(fs_reg
> *attr, const char *name,
> >           }
> >        } else {
> >           /* Smooth/noperspective interpolation case. */
> > -         for (unsigned int i = 0; i < type->vector_elements; i++)
> {
> > +         for (unsigned int i = 0; i < num_components; i++) {
> >              struct brw_reg interp = interp_reg(*location, i);
> >              if (devinfo->needs_unlit_centroid_workaround &&
> mod_centroid) {
> >                 /* Get the pixel/sample mask into f0 so that we
> know
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> > index 1f88f8f..0c72802 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -181,7 +181,7 @@ public:
> >                                     const glsl_type *type,
> >                                     glsl_interp_qualifier
> interpolation_mode,
> >                                     int *location, bool
> mod_centroid,
> > -                                   bool mod_sample);
> > +                                   bool mod_sample, unsigned
> num_components);
> >     fs_reg *emit_vs_system_value(int location);
> >     void emit_interpolation_setup_gen4();
> >     void emit_interpolation_setup_gen6();
> > @@ -200,7 +200,8 @@ public:
> >     void emit_nir_code();
> >     void nir_setup_inputs();
> >     void nir_setup_single_output_varying(fs_reg *reg, const
> glsl_type *type,
> > -                                        unsigned *location);
> > +                                        unsigned *location,
> > +                                        unsigned num_components);
> >     void nir_setup_outputs();
> >     void nir_setup_uniforms();
> >     void nir_emit_system_values();
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > index 04ed42e..a08297e 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > @@ -78,7 +78,8 @@ fs_visitor::nir_setup_inputs()
> >           emit_general_interpolation(&input, var->name, var->type,
> >                                      (glsl_interp_qualifier) var-
> >data.interpolation,
> >                                      &location, var->data.centroid,
> > -                                    var->data.sample);
> > +                                    var->data.sample,
> > +                                    var-
> >data.num_packed_components);
> >        }
> >     }
> >  }
> > @@ -86,23 +87,27 @@ fs_visitor::nir_setup_inputs()
> >  void
> >  fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
> >                                              const glsl_type *type,
> > -                                            unsigned *location)
> > +                                            unsigned *location,
> > +                                            unsigned
> num_packed_components)
> >  {
> >     if (type->is_array() || type->is_matrix()) {
> >        const struct glsl_type *elem_type =
> glsl_get_array_element(type);
> >        const unsigned length = glsl_get_length(type);
> >
> >        for (unsigned i = 0; i < length; i++) {
> > -         nir_setup_single_output_varying(reg, elem_type,
> location);
> > +         nir_setup_single_output_varying(reg, elem_type, location,
> > +                                         num_packed_components);
> >        }
> >     } else if (type->is_record()) {
> >        for (unsigned i = 0; i < type->length; i++) {
> >           const struct glsl_type *field_type = type-
> >fields.structure[i].type;
> > -         nir_setup_single_output_varying(reg, field_type,
> location);
> > +         nir_setup_single_output_varying(reg, field_type,
> location,
> > +                                         num_packed_components);
> >        }
> >     } else {
> >        assert(type->is_scalar() || type->is_vector());
> > -      unsigned num_elements = type->vector_elements;
> > +      unsigned num_elements = num_packed_components ?
> num_packed_components :
> > +         type->vector_elements;
> >        if (type->is_double())
> >           num_elements *= 2;
> >        for (unsigned count = 0; count < num_elements; count += 4) {
> > @@ -132,7 +137,8 @@ fs_visitor::nir_setup_outputs()
> >        case MESA_SHADER_TESS_EVAL:
> >        case MESA_SHADER_GEOMETRY: {
> >           unsigned location = var->data.location;
> > -         nir_setup_single_output_varying(&reg, var->type,
> &location);
> > +         nir_setup_single_output_varying(&reg, var->type,
> &location,
> > +                                         var-
> >data.num_packed_components);
> >           break;
> >        }
> >        case MESA_SHADER_FRAGMENT:
> > @@ -158,7 +164,9 @@ fs_visitor::nir_setup_outputs()
> >           } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
> {
> >              this->sample_mask = reg;
> >           } else {
> > -            int vector_elements = var->type->without_array()-
> >vector_elements;
> > +            int vector_elements = var->data.num_packed_components
> ?
> > +               var->data.num_packed_components :
> > +               var->type->without_array()->vector_elements;
> >
> >              /* gl_FragData or a user-defined FS output */
> >              assert(var->data.location >= FRAG_RESULT_DATA0 &&
> > @@ -3835,6 +3843,7 @@ fs_visitor::nir_emit_intrinsic(const
> fs_builder &bld, nir_intrinsic_instr *instr
> >
> >     case nir_intrinsic_load_input: {
> >        fs_reg src;
> > +      unsigned first_component = nir_intrinsic_component(instr);
> >        unsigned num_components = instr->num_components;
> >        enum brw_reg_type type = dest.type;
> >
> > @@ -3859,7 +3868,7 @@ fs_visitor::nir_emit_intrinsic(const
> fs_builder &bld, nir_intrinsic_instr *instr
> >        src = offset(src, bld, const_offset->u32[0]);
> >
> >        for (unsigned j = 0; j < num_components; j++) {
> > -         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
> > +         bld.MOV(offset(dest, bld, j), offset(src, bld, j +
> first_component));
> >        }
> >
> >        if (type == BRW_REGISTER_TYPE_DF) {
> > @@ -3985,6 +3994,7 @@ fs_visitor::nir_emit_intrinsic(const
> fs_builder &bld, nir_intrinsic_instr *instr
> >        new_dest = offset(new_dest, bld, const_offset->u32[0]);
> >
> >        unsigned num_components = instr->num_components;
> > +      unsigned first_component = nir_intrinsic_component(instr);
> >        unsigned bit_size = instr->src[0].is_ssa ?
> >           instr->src[0].ssa->bit_size : instr->src[0].reg.reg-
> >bit_size;
> >        if (bit_size == 64) {
> > @@ -3998,7 +4008,8 @@ fs_visitor::nir_emit_intrinsic(const
> fs_builder &bld, nir_intrinsic_instr *instr
> >        }
> >
> >        for (unsigned j = 0; j < num_components; j++) {
> > -         bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
> > +         bld.MOV(offset(new_dest, bld, j + first_component),
> > +                 offset(src, bld, j));
> >        }
> >        break;
> >     }
> > --
> > 2.7.4
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list