[Mesa-dev] [PATCH] i965: Move Gen4-5 interpolation stuff to brw_wm_prog_data.

Timothy Arceri timothy.arceri at collabora.com
Sat Jan 14 00:15:49 UTC 2017


On Fri, 2017-01-13 at 16:07 -0800, Kenneth Graunke wrote:
> This fixes glxgears rendering, which had surprisingly been broken
> since
> late October!  Specifically, commit
> 91d61fbf7cb61a44adcaae51ee08ad0dd6b.
> 
> glxgears uses glShadeModel(GL_FLAT) when drawing the main portion of
> the
> gears, then uses glShadeModel(GL_SMOOTH) for drawing the Gouraud-
> shaded
> inner portion of the gears.  This results in the same fragment
> program
> having two different state-dependent interpolation maps: one where
> gl_Color is flat, and another where it's smooth.
> 
> The problem is that there's only one gen4_fragment_program, so it
> can't
> store both.  Each FS compile would trash the last one.  But, the FS
> compiles are cached, so the first one would store FLAT, and the
> second
> would see a matching program in the cache and never bother to compile
> one with SMOOTH.  (Clearing the program cache on every draw made it
> render correctly.)

I believe the fs key should have caused the one with SMOOTH to be
compiled, but since variants share gl_program it would get set to
whatever the last variant to be compiled had set and would never change
again as they would be grabbed from cache in future.

Thanks so much for fixing this. It would have been tricky without the
hardware :)

Reviewed-by: Timothy Arceri <timothy.arceri at collabora.com>


> 
> Instead, move it to brw_wm_prog_data, where we can keep a copy for
> every specialization of the program.  The only downside is bloating
> the structure a bit, but we can tighten that up a bit if we need to.
> This also lets us kill gen4_fragment_program entirely!
> 
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Timothy Arceri <timothy.arceri at collabora.com>
> ---
>  src/mesa/drivers/dri/i965/brw_clip.c              | 19 +++++++----
> ---
>  src/mesa/drivers/dri/i965/brw_clip.h              |  2 +-
>  src/mesa/drivers/dri/i965/brw_compiler.h          | 10 +++++++-
>  src/mesa/drivers/dri/i965/brw_context.h           | 14 -----------
>  src/mesa/drivers/dri/i965/brw_fs.cpp              |  9 ++++++-
>  src/mesa/drivers/dri/i965/brw_interpolation_map.c | 30 +++++++++++
> ------------
>  src/mesa/drivers/dri/i965/brw_nir.c               |  8 +-----
>  src/mesa/drivers/dri/i965/brw_nir.h               |  3 +--
>  src/mesa/drivers/dri/i965/brw_program.c           |  9 +------
>  src/mesa/drivers/dri/i965/brw_sf.c                | 16 ++++++------
>  src/mesa/drivers/dri/i965/brw_sf.h                |  2 +-
>  11 files changed, 52 insertions(+), 70 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_clip.c
> b/src/mesa/drivers/dri/i965/brw_clip.c
> index 8560dd4..e375674 100644
> --- a/src/mesa/drivers/dri/i965/brw_clip.c
> +++ b/src/mesa/drivers/dri/i965/brw_clip.c
> @@ -139,7 +139,7 @@ brw_upload_clip_prog(struct brw_context *brw)
>                          _NEW_POLYGON |
>                          _NEW_TRANSFORM,
>                          BRW_NEW_BLORP |
> -                        BRW_NEW_FRAGMENT_PROGRAM |
> +                        BRW_NEW_FS_PROG_DATA |
>                          BRW_NEW_REDUCED_PRIMITIVE |
>                          BRW_NEW_VUE_MAP_GEOM_OUT))
>        return;
> @@ -149,15 +149,14 @@ brw_upload_clip_prog(struct brw_context *brw)
>     /* Populate the key:
>      */
>  
> -   const struct gl_program *fprog = brw->fragment_program;
> -   if (fprog) {
> -      assert(brw->gen < 6);
> -      struct gen4_fragment_program *p = (struct
> gen4_fragment_program *) fprog;
> -
> -      /* BRW_NEW_FRAGMENT_PROGRAM */
> -      key.contains_flat_varying = p->contains_flat_varying;
> -      key.contains_noperspective_varying = p-
> >contains_noperspective_varying;
> -      key.interp_mode = p->interp_mode;
> +   /* BRW_NEW_FS_PROG_DATA */
> +   const struct brw_wm_prog_data *wm_prog_data =
> +      brw_wm_prog_data(brw->wm.base.prog_data);
> +   if (wm_prog_data) {
> +      key.contains_flat_varying = wm_prog_data-
> >contains_flat_varying;
> +      key.contains_noperspective_varying =
> +         wm_prog_data->contains_noperspective_varying;
> +      key.interp_mode = wm_prog_data->interp_mode;
>     }
>  
>     /* BRW_NEW_REDUCED_PRIMITIVE */
> diff --git a/src/mesa/drivers/dri/i965/brw_clip.h
> b/src/mesa/drivers/dri/i965/brw_clip.h
> index 355ae64..a8ee394 100644
> --- a/src/mesa/drivers/dri/i965/brw_clip.h
> +++ b/src/mesa/drivers/dri/i965/brw_clip.h
> @@ -49,7 +49,7 @@ struct brw_clip_prog_key {
>     GLbitfield64 attrs;
>     bool contains_flat_varying;
>     bool contains_noperspective_varying;
> -   unsigned char *interp_mode;
> +   const unsigned char *interp_mode;
>     GLuint primitive:4;
>     GLuint nr_userclip:4;
>     GLuint pv_first:1;
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h
> b/src/mesa/drivers/dri/i965/brw_compiler.h
> index c378e93..3b3b7e0 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> @@ -412,6 +412,9 @@ struct brw_wm_prog_data {
>     bool has_side_effects;
>     bool pulls_bary;
>  
> +   bool contains_flat_varying;
> +   bool contains_noperspective_varying;
> +
>     /**
>      * Mask of which interpolation modes are required by the fragment
> shader.
>      * Used in hardware setup on gen6+.
> @@ -424,6 +427,11 @@ struct brw_wm_prog_data {
>      */
>     uint32_t flat_inputs;
>  
> +   /* Mapping of VUE slots to interpolation modes.
> +    * Used by the Gen4-5 clip/sf/wm stages.
> +    */
> +   unsigned char interp_mode[65]; /* BRW_VARYING_SLOT_COUNT */
> +
>     /**
>      * Map from gl_varying_slot to the position within the FS setup
> data
>      * payload where the varying's attribute vertex deltas should be
> delivered.
> @@ -580,7 +588,7 @@ void brw_compute_tess_vue_map(struct brw_vue_map
> *const vue_map,
>  /* brw_interpolation_map.c */
>  void brw_setup_vue_interpolation(struct brw_vue_map *vue_map,
>                                   struct nir_shader *nir,
> -                                 struct gl_program *prog,
> +                                 struct brw_wm_prog_data *prog_data,
>                                   const struct gen_device_info
> *devinfo);
>  
>  enum shader_dispatch_mode {
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
> index ff3f861..d5e4251 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -338,20 +338,6 @@ struct brw_program {
>  };
>  
>  
> -struct gen4_fragment_program {
> -   struct brw_program base;
> -
> -   bool contains_flat_varying;
> -   bool contains_noperspective_varying;
> -
> -   /*
> -    * Mapping of varying slots to interpolation modes.
> -    * Used Gen4/5 by the clip|sf|wm stages.
> -    */
> -   unsigned char interp_mode[BRW_VARYING_SLOT_COUNT];
> -};
> -
> -
>  /**
>   * Bitmask indicating which fragment shader inputs represent
> varyings (and
>   * hence have to be delivered to the fragment shader by the SF/SBE
> stage).
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index b1f9f63..13949c9 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -6382,10 +6382,17 @@ brw_compile_fs(const struct brw_compiler
> *compiler, void *log_data,
>                 unsigned *final_assembly_size,
>                 char **error_str)
>  {
> +   const struct gen_device_info *devinfo = compiler->devinfo;
> +
>     nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
>     shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex,
> true);
> -   brw_nir_lower_fs_inputs(shader, vue_map, prog, compiler->devinfo, 
> key);
> +   brw_nir_lower_fs_inputs(shader, devinfo, key);
>     brw_nir_lower_fs_outputs(shader);
> +
> +   if (devinfo->gen < 6) {
> +      brw_setup_vue_interpolation(vue_map, shader, prog_data,
> devinfo);
> +   }
> +
>     if (!key->multisample_fbo)
>        NIR_PASS_V(shader, demote_sample_qualifiers);
>     NIR_PASS_V(shader, move_interpolation_to_top);
> diff --git a/src/mesa/drivers/dri/i965/brw_interpolation_map.c
> b/src/mesa/drivers/dri/i965/brw_interpolation_map.c
> index 8533c95..8d53e52 100644
> --- a/src/mesa/drivers/dri/i965/brw_interpolation_map.c
> +++ b/src/mesa/drivers/dri/i965/brw_interpolation_map.c
> @@ -37,20 +37,20 @@ static char const *get_qual_name(int mode)
>  }
>  
>  static void
> -gen4_frag_prog_set_interp_modes(struct gen4_fragment_program *prog,
> +gen4_frag_prog_set_interp_modes(struct brw_wm_prog_data *prog_data,
>                                  struct brw_vue_map *vue_map,
>                                  unsigned location, unsigned
> slot_count,
>                                  enum glsl_interp_mode interp)
>  {
>     for (unsigned k = 0; k < slot_count; k++) {
>        unsigned slot = vue_map->varying_to_slot[location + k];
> -      if (slot != -1 && prog->interp_mode[slot] == INTERP_MODE_NONE)
> {
> -         prog->interp_mode[slot] = interp;
> +      if (slot != -1 && prog_data->interp_mode[slot] ==
> INTERP_MODE_NONE) {
> +         prog_data->interp_mode[slot] = interp;
>  
> -         if (prog->interp_mode[slot] == INTERP_MODE_FLAT) {
> -            prog->contains_flat_varying = true;
> -         } else if (prog->interp_mode[slot] ==
> INTERP_MODE_NOPERSPECTIVE) {
> -            prog->contains_noperspective_varying = true;
> +         if (prog_data->interp_mode[slot] == INTERP_MODE_FLAT) {
> +            prog_data->contains_flat_varying = true;
> +         } else if (prog_data->interp_mode[slot] ==
> INTERP_MODE_NOPERSPECTIVE) {
> +            prog_data->contains_noperspective_varying = true;
>           }
>        }
>     }
> @@ -59,13 +59,11 @@ gen4_frag_prog_set_interp_modes(struct
> gen4_fragment_program *prog,
>  /* Set up interpolation modes for every element in the VUE */
>  void
>  brw_setup_vue_interpolation(struct brw_vue_map *vue_map, nir_shader
> *nir,
> -                            struct gl_program *prog,
> +                            struct brw_wm_prog_data *prog_data,
>                              const struct gen_device_info *devinfo)
>  {
> -   struct gen4_fragment_program *fprog = (struct
> gen4_fragment_program *) prog;
> -
>     /* Initialise interp_mode. INTERP_MODE_NONE == 0 */
> -   memset(fprog->interp_mode, 0, sizeof(fprog->interp_mode));
> +   memset(prog_data->interp_mode, 0, sizeof(prog_data-
> >interp_mode));
>  
>     if (!vue_map)
>        return;
> @@ -75,20 +73,20 @@ brw_setup_vue_interpolation(struct brw_vue_map
> *vue_map, nir_shader *nir,
>      */
>     unsigned pos_slot = vue_map->varying_to_slot[VARYING_SLOT_POS];
>     if (pos_slot != -1) {;
> -      fprog->interp_mode[pos_slot] = INTERP_MODE_NOPERSPECTIVE;
> -      fprog->contains_noperspective_varying = true;
> +      prog_data->interp_mode[pos_slot] = INTERP_MODE_NOPERSPECTIVE;
> +      prog_data->contains_noperspective_varying = true;
>     }
>  
>     foreach_list_typed(nir_variable, var, node, &nir->inputs) {
>        unsigned location = var->data.location;
>        unsigned slot_count = glsl_count_attribute_slots(var->type,
> false);
>  
> -      gen4_frag_prog_set_interp_modes(fprog, vue_map, location,
> slot_count,
> +      gen4_frag_prog_set_interp_modes(prog_data, vue_map, location,
> slot_count,
>                                        var->data.interpolation);
>  
>        if (location == VARYING_SLOT_COL0 || location ==
> VARYING_SLOT_COL1) {
>           location = location + VARYING_SLOT_BFC0 -
> VARYING_SLOT_COL0;
> -         gen4_frag_prog_set_interp_modes(fprog, vue_map, location,
> +         gen4_frag_prog_set_interp_modes(prog_data, vue_map,
> location,
>                                           slot_count, var-
> >data.interpolation);
>        }
>     }
> @@ -105,7 +103,7 @@ brw_setup_vue_interpolation(struct brw_vue_map
> *vue_map, nir_shader *nir,
>  
>           fprintf(stderr, "%d: %d %s ofs %d\n",
>                   i, varying,
> -                 get_qual_name(fprog->interp_mode[i]),
> +                 get_qual_name(prog_data->interp_mode[i]),
>                   brw_vue_slot_to_offset(i));
>        }
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> b/src/mesa/drivers/dri/i965/brw_nir.c
> index 3c1bc51..a5912a0 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -343,8 +343,7 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const
> struct brw_vue_map *vue_map)
>  }
>  
>  void
> -brw_nir_lower_fs_inputs(nir_shader *nir, struct brw_vue_map
> *vue_map,
> -                        struct gl_program *prog,
> +brw_nir_lower_fs_inputs(nir_shader *nir,
>                          const struct gen_device_info *devinfo,
>                          const struct brw_wm_prog_key *key)
>  {
> @@ -375,11 +374,6 @@ brw_nir_lower_fs_inputs(nir_shader *nir, struct
> brw_vue_map *vue_map,
>        }
>     }
>  
> -   if (devinfo->gen < 6) {
> -      assert(prog); /* prog will be NULL when called from Vulkan */
> -      brw_setup_vue_interpolation(vue_map, nir, prog, devinfo);
> -   }
> -
>     nir_lower_io_options lower_io_options = 0;
>     if (key->persample_interp)
>        lower_io_options |= nir_lower_io_force_sample_interpolation;
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h
> b/src/mesa/drivers/dri/i965/brw_nir.h
> index ecb4118..7e2f279 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.h
> +++ b/src/mesa/drivers/dri/i965/brw_nir.h
> @@ -103,8 +103,7 @@ void brw_nir_lower_vs_inputs(nir_shader *nir,
>  void brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
>                                const struct brw_vue_map *vue_map);
>  void brw_nir_lower_tes_inputs(nir_shader *nir, const struct
> brw_vue_map *vue);
> -void brw_nir_lower_fs_inputs(nir_shader *nir, struct brw_vue_map
> *vue_map,
> -                             struct gl_program *prog,
> +void brw_nir_lower_fs_inputs(nir_shader *nir,
>                               const struct gen_device_info *devinfo,
>                               const struct brw_wm_prog_key *key);
>  void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar);
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c
> b/src/mesa/drivers/dri/i965/brw_program.c
> index c8fb3fa..e81f6b1 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -158,14 +158,7 @@ static struct gl_program *brwNewProgram(struct
> gl_context *ctx, GLenum target,
>     }
>  
>     case GL_FRAGMENT_PROGRAM_ARB: {
> -      struct brw_program *prog;
> -      if (brw->gen < 6) {
> -         struct gen4_fragment_program *g4_prog =
> -            rzalloc(NULL, struct gen4_fragment_program);
> -         prog = &g4_prog->base;
> -      } else {
> -         prog = rzalloc(NULL, struct brw_program);
> -      }
> +      struct brw_program *prog = rzalloc(NULL, struct brw_program);
>  
>        if (prog) {
>  	 prog->id = get_new_program_id(brw->screen);
> diff --git a/src/mesa/drivers/dri/i965/brw_sf.c
> b/src/mesa/drivers/dri/i965/brw_sf.c
> index 76faccd..468050a 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf.c
> +++ b/src/mesa/drivers/dri/i965/brw_sf.c
> @@ -147,7 +147,7 @@ brw_upload_sf_prog(struct brw_context *brw)
>                          _NEW_PROGRAM |
>                          _NEW_TRANSFORM,
>                          BRW_NEW_BLORP |
> -                        BRW_NEW_FRAGMENT_PROGRAM |
> +                        BRW_NEW_FS_PROG_DATA |
>                          BRW_NEW_REDUCED_PRIMITIVE |
>                          BRW_NEW_VUE_MAP_GEOM_OUT))
>        return;
> @@ -203,14 +203,12 @@ brw_upload_sf_prog(struct brw_context *brw)
>     if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo)
>        key.sprite_origin_lower_left = true;
>  
> -   const struct gl_program *fprog = brw->fragment_program;
> -   if (fprog) {
> -      assert(brw->gen < 6);
> -      struct gen4_fragment_program *p = (struct
> gen4_fragment_program *) fprog;
> -
> -      /* BRW_NEW_FRAGMENT_PROGRAM */
> -      key.contains_flat_varying = p->contains_flat_varying;
> -      key.interp_mode = p->interp_mode;
> +   /* BRW_NEW_FS_PROG_DATA */
> +   const struct brw_wm_prog_data *wm_prog_data =
> +      brw_wm_prog_data(brw->wm.base.prog_data);
> +   if (wm_prog_data) {
> +      key.contains_flat_varying = wm_prog_data-
> >contains_flat_varying;
> +      key.interp_mode = wm_prog_data->interp_mode;
>     }
>  
>     /* _NEW_LIGHT | _NEW_PROGRAM */
> diff --git a/src/mesa/drivers/dri/i965/brw_sf.h
> b/src/mesa/drivers/dri/i965/brw_sf.h
> index ce4b2e3..c6840dd 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf.h
> +++ b/src/mesa/drivers/dri/i965/brw_sf.h
> @@ -47,7 +47,7 @@
>  struct brw_sf_prog_key {
>     GLbitfield64 attrs;
>     bool contains_flat_varying;
> -   unsigned char *interp_mode;
> +   const unsigned char *interp_mode;
>     uint8_t point_sprite_coord_replace;
>     GLuint primitive:2;
>     GLuint do_twoside_color:1;


More information about the mesa-dev mailing list