[Mesa-dev] [PATCH V3 2/5] i965 Gen4/5: Generalize SF interpolation setup for GLSL1.3

Paul Berry stereotype441 at gmail.com
Tue Jul 30 13:53:25 PDT 2013


On 14 July 2013 02:39, Chris Forbes <chrisf at ijw.co.nz> wrote:

> Previously the SF only handled the builtin color varying specially.
> This patch generalizes that support to cover user-defined varyings,
> driven by the interpolation mode array set up alongside the VUE map.
>
> Based on the following patches from Olivier Galibert:
> - http://lists.freedesktop.org/archives/mesa-dev/2012-July/024335.html
> - http://lists.freedesktop.org/archives/mesa-dev/2012-July/024339.html
>
> With this patch, all the GLSL 1.3 interpolation tests that do not clip
> (spec/glsl-1.30/execution/interpolation/*-none.shader_test) pass.
>
> Signed-off-by: Chris Forbes <chrisf at ijw.co.nz>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp    |   2 +-
>  src/mesa/drivers/dri/i965/brw_sf.c      |   9 +-
>  src/mesa/drivers/dri/i965/brw_sf.h      |   2 +-
>  src/mesa/drivers/dri/i965/brw_sf_emit.c | 187
> ++++++++++++++++----------------
>  4 files changed, 106 insertions(+), 94 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index afd29de..a81e97f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1048,7 +1048,7 @@ fs_visitor::emit_general_interpolation(ir_variable
> *ir)
>                    inst->predicate = BRW_PREDICATE_NORMAL;
>                    inst->predicate_inverse = true;
>                 }
> -               if (brw->gen < 6) {
> +               if (brw->gen < 6 && interpolation_mode ==
> INTERP_QUALIFIER_SMOOTH) {
>                    emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
>                 }
>                attr.reg_offset++;
> diff --git a/src/mesa/drivers/dri/i965/brw_sf.c
> b/src/mesa/drivers/dri/i965/brw_sf.c
> index 7cd383c..f7725ed 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf.c
> +++ b/src/mesa/drivers/dri/i965/brw_sf.c
> @@ -138,6 +138,7 @@ brw_upload_sf_prog(struct brw_context *brw)
>     struct brw_sf_prog_key key;
>     /* _NEW_BUFFERS */
>     bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
> +   int i;
>
>     memset(&key, 0, sizeof(key));
>
> @@ -190,7 +191,13 @@ brw_upload_sf_prog(struct brw_context *brw)
>        key.sprite_origin_lower_left = true;
>
>     /* _NEW_LIGHT | _NEW_PROGRAM */
> -   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
> +   key.has_flat_shading = 0;
> +   for (i = 0; i < BRW_VARYING_SLOT_COUNT; i++) {
> +      if (brw->interpolation_mode[i] == INTERP_QUALIFIER_FLAT) {
> +         key.has_flat_shading = 1;
> +         break;
> +      }
> +   }
>

Having one part of the key that is just derived from another part of the
key is inefficient, since it means that this computation has to happen
every time the state atom ir executed.

I'd recommend moving has_flat_shading into brw_sf_compile, that way we can
compute it in compile_sf_prog(), which only has to execute when the key
changes.


>     key.do_twoside_color = ((ctx->Light.Enabled &&
> ctx->Light.Model.TwoSide) ||
>                             ctx->VertexProgram._TwoSideEnabled);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_sf.h
> b/src/mesa/drivers/dri/i965/brw_sf.h
> index 55a860d..9bfa994 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf.h
> +++ b/src/mesa/drivers/dri/i965/brw_sf.h
> @@ -50,7 +50,7 @@ struct brw_sf_prog_key {
>     uint8_t point_sprite_coord_replace;
>     GLuint primitive:2;
>     GLuint do_twoside_color:1;
> -   GLuint do_flat_shading:1;
> +   GLuint has_flat_shading:1;
>     GLuint frontface_ccw:1;
>     GLuint do_point_sprite:1;
>     GLuint do_point_coord:1;
> diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c
> b/src/mesa/drivers/dri/i965/brw_sf_emit.c
> index bd68f68..63e077d 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
> @@ -44,6 +44,15 @@
>
>
>  /**
> + * Determine the vue slot corresponding to the given half of the given
> register.
> + */
> +static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint
> reg,
> +                                       int half)
> +{
> +   return (reg + c->urb_entry_read_offset) * 2 + half;
> +}
> +
> +/**
>   * Determine the varying corresponding to the given half of the given
>   * register.  half=0 means the first half of a register, half=1 means the
>   * second half.
> @@ -51,11 +60,24 @@
>  static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint
> reg,
>                                        int half)
>  {
> -   int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
> +   int vue_slot = vert_reg_to_vue_slot(c, reg, half);
>     return c->vue_map.slot_to_varying[vue_slot];
>  }
>
>  /**
> + * Determine the register corresponding to the given vue slot
> + */
> +static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
> +                                   struct brw_reg vert,
> +                                   int vue_slot)
> +{
> +   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
> +   GLuint sub = vue_slot % 2;
> +
> +   return brw_vec4_grf(vert.nr + off, sub * 4);
> +}
> +
> +/**
>   * Determine the register corresponding to the given varying.
>   */
>  static struct brw_reg get_varying(struct brw_sf_compile *c,
> @@ -64,10 +86,7 @@ static struct brw_reg get_varying(struct brw_sf_compile
> *c,
>  {
>     int vue_slot = c->vue_map.varying_to_slot[varying];
>     assert (vue_slot >= c->urb_entry_read_offset);
>

Let's move this assertion into get_vue_slot() as well.


> -   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
> -   GLuint sub = vue_slot % 2;
> -
> -   return brw_vec4_grf(vert.nr + off, sub * 4);
> +   return get_vue_slot(c, vert, vue_slot);
>  }
>
>  static bool
> @@ -79,24 +98,9 @@ have_attr(struct brw_sf_compile *c, GLuint attr)
>  /***********************************************************************
>   * Twoside lighting
>   */
> -static void copy_bfc( struct brw_sf_compile *c,
> -                     struct brw_reg vert )
> -{
> -   struct brw_compile *p = &c->func;
> -   GLuint i;
> -
> -   for (i = 0; i < 2; i++) {
> -      if (have_attr(c, VARYING_SLOT_COL0+i) &&
> -         have_attr(c, VARYING_SLOT_BFC0+i))
> -        brw_MOV(p,
> -                get_varying(c, vert, VARYING_SLOT_COL0+i),
> -                get_varying(c, vert, VARYING_SLOT_BFC0+i));
> -   }
> -}
> -
> -
>  static void do_twoside_color( struct brw_sf_compile *c )
>  {
> +   GLuint i, need_0, need_1;
>     struct brw_compile *p = &c->func;
>     GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G
> : BRW_CONDITIONAL_L;
>
> @@ -105,14 +109,16 @@ static void do_twoside_color( struct brw_sf_compile
> *c )
>     if (c->key.primitive == SF_UNFILLED_TRIS)
>        return;
>
> -   /* XXX: What happens if BFC isn't present?  This could only happen
> -    * for user-supplied vertex programs, as t_vp_build.c always does
> -    * the right thing.
> +   /* If the vertex shader provides backface color, do the selection. The
> VS
> +    * promises to set up the front color if the backface color is
> provided, but
> +    * it may contain junk if never written to.
>

Can you point me to some VS code that does this?  I thought that if the VS
wrote only to gl_BackColor, then the VUE map would only contain a slot for
gl_BackColor.  In fact, we have specific code in the Gen6-7 SF backend to
handle this case:

   /* If there was only a back color written but not front, use back
    * as the color instead of undefined
    */
   if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
   if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];

It seems to me that if we want backcolor to work right in all cases, we
need to do a similar mapping for Gen4-5.

Regardless, I with Ken that it would probably be good to pull this
backfacing color fix out to its own patch.


>      */
> -   if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c,
> VARYING_SLOT_BFC0)) &&
> -       !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c,
> VARYING_SLOT_BFC1)))
> +   need_0 = have_attr(c, VARYING_SLOT_COL0) && have_attr(c,
> VARYING_SLOT_BFC0);
> +   need_1 = have_attr(c, VARYING_SLOT_COL1) && have_attr(c,
> VARYING_SLOT_BFC1);
> +
> +   if (!need_0 && !need_1)
>        return;
> -
> +
>     /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
>      * to get all channels active inside the IF.  In the clipping code
>      * we run with NoMask, so it's not an option and we can use
> @@ -121,12 +127,15 @@ static void do_twoside_color( struct brw_sf_compile
> *c )
>     brw_push_insn_state(p);
>     brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det,
> brw_imm_f(0));
>     brw_IF(p, BRW_EXECUTE_4);
> -   {
> -      switch (c->nr_verts) {
> -      case 3: copy_bfc(c, c->vert[2]);
> -      case 2: copy_bfc(c, c->vert[1]);
> -      case 1: copy_bfc(c, c->vert[0]);
> -      }
> +   for (i=0; i < c->nr_verts; i++) {
> +      if (need_0)
> +         brw_MOV(p,
> +                 get_varying(c, c->vert[i], VARYING_SLOT_COL0),
> +                 get_varying(c, c->vert[i], VARYING_SLOT_BFC0));
> +      if (need_1)
> +         brw_MOV(p,
> +                 get_varying(c, c->vert[i], VARYING_SLOT_COL1),
> +                 get_varying(c, c->vert[i], VARYING_SLOT_BFC1));
>     }
>     brw_ENDIF(p);
>     brw_pop_insn_state(p);
> @@ -138,24 +147,34 @@ static void do_twoside_color( struct brw_sf_compile
> *c )
>   * Flat shading
>   */
>
> -#define VARYING_SLOT_COLOR_BITS (BITFIELD64_BIT(VARYING_SLOT_COL0) | \
> -                                 BITFIELD64_BIT(VARYING_SLOT_COL1))
> -
> -static void copy_colors( struct brw_sf_compile *c,
> -                    struct brw_reg dst,
> -                    struct brw_reg src)
> +static void copy_flatshaded_attributes(struct brw_sf_compile *c,
> +                                       struct brw_reg dst,
> +                                       struct brw_reg src)
>  {
>     struct brw_compile *p = &c->func;
> -   GLuint i;
> +   int i;
>
> -   for (i = VARYING_SLOT_COL0; i <= VARYING_SLOT_COL1; i++) {
> -      if (have_attr(c,i))
> -        brw_MOV(p,
> -                get_varying(c, dst, i),
> -                get_varying(c, src, i));
> +   for (i = 0; i < c->vue_map.num_slots; i++) {
> +      if (c->key.interpolation_mode[i] == INTERP_QUALIFIER_FLAT) {
> +         brw_MOV(p,
> +                 get_vue_slot(c, dst, i),
> +                 get_vue_slot(c, src, i));
> +      }
>     }
>  }
>
> +static int count_flatshaded_attributes(struct brw_sf_compile *c)
> +{
> +   int i;
> +   int count = 0;
> +
> +   for (i = 0; i < c->vue_map.num_slots; i++)
> +      if (c->key.interpolation_mode[i] == INTERP_QUALIFIER_FLAT)
> +         count++;
> +
> +   return count;
> +}
> +
>
>
>  /* Need to use a computed jump to copy flatshaded attributes as the
> @@ -167,12 +186,9 @@ static void do_flatshade_triangle( struct
> brw_sf_compile *c )
>     struct brw_compile *p = &c->func;
>     struct brw_context *brw = p->brw;
>     struct brw_reg ip = brw_ip_reg();
> -   GLuint nr = _mesa_bitcount_64(c->key.attrs & VARYING_SLOT_COLOR_BITS);
> +   GLuint nr;
>     GLuint jmpi = 1;
>
> -   if (!nr)
> -      return;
> -
>

It seems a shame to lose this optimization.  Can we move the call to
count_flatshaded_attributes() up to the declaration of nr, and then we can
keep this?


>     /* Already done in clip program:
>      */
>     if (c->key.primitive == SF_UNFILLED_TRIS)
> @@ -181,21 +197,23 @@ static void do_flatshade_triangle( struct
> brw_sf_compile *c )
>     if (brw->gen == 5)
>         jmpi = 2;
>
> +   nr = count_flatshaded_attributes(c);
> +
>     brw_push_insn_state(p);
> -
> +
>     brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
>     brw_JMPI(p, ip, ip, c->pv);
>
> -   copy_colors(c, c->vert[1], c->vert[0]);
> -   copy_colors(c, c->vert[2], c->vert[0]);
> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
> +   copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
>     brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
>
> -   copy_colors(c, c->vert[0], c->vert[1]);
> -   copy_colors(c, c->vert[2], c->vert[1]);
> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
> +   copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
>     brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
>
> -   copy_colors(c, c->vert[0], c->vert[2]);
> -   copy_colors(c, c->vert[1], c->vert[2]);
> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
>
>     brw_pop_insn_state(p);
>  }
> @@ -206,12 +224,9 @@ static void do_flatshade_line( struct brw_sf_compile
> *c )
>     struct brw_compile *p = &c->func;
>     struct brw_context *brw = p->brw;
>     struct brw_reg ip = brw_ip_reg();
> -   GLuint nr = _mesa_bitcount_64(c->key.attrs & VARYING_SLOT_COLOR_BITS);
> +   GLuint nr;
>     GLuint jmpi = 1;
>
> -   if (!nr)
> -      return;
> -
>

Same comment as above.


>     /* Already done in clip program:
>      */
>     if (c->key.primitive == SF_UNFILLED_TRIS)
> @@ -220,14 +235,16 @@ static void do_flatshade_line( struct brw_sf_compile
> *c )
>     if (brw->gen == 5)
>         jmpi = 2;
>
> +   nr = count_flatshaded_attributes(c);
> +
>     brw_push_insn_state(p);
>
>     brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
>     brw_JMPI(p, ip, ip, c->pv);
> -   copy_colors(c, c->vert[1], c->vert[0]);
> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
>
>     brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
> -   copy_colors(c, c->vert[0], c->vert[1]);
> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
>
>     brw_pop_insn_state(p);
>  }
> @@ -324,36 +341,23 @@ static void invert_det( struct brw_sf_compile *c)
>
>  static bool
>  calculate_masks(struct brw_sf_compile *c,
> -               GLuint reg,
> -               GLushort *pc,
> -               GLushort *pc_persp,
> -               GLushort *pc_linear)
> +                GLuint reg,
> +                GLushort *pc,
> +                GLushort *pc_persp,
> +                GLushort *pc_linear)
>  {
>     bool is_last_attr = (reg == c->nr_setup_regs - 1);
> -   GLbitfield64 persp_mask;
> -   GLbitfield64 linear_mask;
> -
> -   if (c->key.do_flat_shading)
> -      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VARYING_SLOT_POS) |
> -                                    BITFIELD64_BIT(VARYING_SLOT_COL0) |
> -                                    BITFIELD64_BIT(VARYING_SLOT_COL1));
> -   else
> -      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VARYING_SLOT_POS));
> -
> -   if (c->key.do_flat_shading)
> -      linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VARYING_SLOT_COL0) |
> -                                     BITFIELD64_BIT(VARYING_SLOT_COL1));
> -   else
> -      linear_mask = c->key.attrs;
> +   enum glsl_interp_qualifier interp;
>
>     *pc_persp = 0;
>     *pc_linear = 0;
>     *pc = 0xf;
>
> -   if (persp_mask & BITFIELD64_BIT(vert_reg_to_varying(c, reg, 0)))
> +   interp = c->key.interpolation_mode[vert_reg_to_vue_slot(c, reg, 0)];
> +   if (interp == INTERP_QUALIFIER_SMOOTH) {
> +      *pc_linear = 0xf;
>        *pc_persp = 0xf;
> -
> -   if (linear_mask & BITFIELD64_BIT(vert_reg_to_varying(c, reg, 0)))
> +   } else if (interp == INTERP_QUALIFIER_NOPERSPECTIVE)
>        *pc_linear = 0xf;
>
>     /* Maybe only processs one attribute on the final round:
> @@ -361,11 +365,12 @@ calculate_masks(struct brw_sf_compile *c,
>     if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
>        *pc |= 0xf0;
>
> -      if (persp_mask & BITFIELD64_BIT(vert_reg_to_varying(c, reg, 1)))
> -        *pc_persp |= 0xf0;
> -
> -      if (linear_mask & BITFIELD64_BIT(vert_reg_to_varying(c, reg, 1)))
> -        *pc_linear |= 0xf0;
> +      interp = c->key.interpolation_mode[vert_reg_to_vue_slot(c, reg, 1)];
> +      if (interp == INTERP_QUALIFIER_SMOOTH) {
> +         *pc_linear |= 0xf0;
> +         *pc_persp |= 0xf0;
> +      } else if (interp == INTERP_QUALIFIER_NOPERSPECTIVE)
> +         *pc_linear |= 0xf0;
>     }
>
>     return is_last_attr;
> @@ -418,7 +423,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool
> allocate)
>     if (c->key.do_twoside_color)
>        do_twoside_color(c);
>
> -   if (c->key.do_flat_shading)
> +   if (c->key.has_flat_shading)
>        do_flatshade_triangle(c);
>
>
> @@ -504,7 +509,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c,
> bool allocate)
>     invert_det(c);
>     copy_z_inv_w(c);
>
> -   if (c->key.do_flat_shading)
> +   if (c->key.has_flat_shading)
>        do_flatshade_line(c);
>
>     for (i = 0; i < c->nr_setup_regs; i++)
> --
> 1.8.3.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130730/e3dfb18d/attachment-0001.html>


More information about the mesa-dev mailing list