[Mesa-dev] [PATCH v2] i965: Perform program state upload outside of atom handling

Kenneth Graunke kenneth at whitecape.org
Thu Feb 19 14:24:16 PST 2015


On Thursday, February 12, 2015 05:38:09 PM Carl Worth wrote:
> Across the board of the various generations, the intial few atoms in
> all of the atom lists are basically the same, (performing uploads for
> the various programs). The only difference is that prior to gen6
> there's an ff_gs upload in place of the later gs upload.
> 
> In this commit, instead of using the atom lists for this program state
> upload, we add a new function brw_upload_programs that performs the
> same state checks and calls the various upload functions as necessary.
> 
> This commit is intended to have no functional change. The motivation
> is that future code, (such as the shader cache), wants to have a
> single function within which to perform various operations before and
> after program upload, (with some local variables holding state across
> the upload).
> ---
> 
> In this revision of the patch, I've changed things so that the lists
> of flags remain in the same files they were in before, (near the code
> performing the operations that the flags control). So I think this
> aspect is a bit cleaner.
> 
> I'm still not a huge fan of the resulting conditional expressions,
> (and the number of parentheses involved in them). It would not be too
> hard to imagine one of those getting edited incorrectly.
> 
> So feedback would be most welcome on improving those.
> 
> Maybe:
> 
>     int dirty = 0;
> 
>     dirty |= (brw->state.dirty.mesa & (FLAG | FLAG2 | FLAG3);
>     dirty |= (brw->state.dirty.brw & (BRW_FLAG | BRW_FLAG2);
> 
>     if (dirty == 0)
>        return;
> 
> ?
> 
>  src/mesa/drivers/dri/i965/brw_ff_gs.c        | 21 +++++++------
>  src/mesa/drivers/dri/i965/brw_ff_gs.h        |  3 ++
>  src/mesa/drivers/dri/i965/brw_gs.c           | 23 ++++++--------
>  src/mesa/drivers/dri/i965/brw_gs.h           |  5 ++++
>  src/mesa/drivers/dri/i965/brw_state_upload.c | 35 ++++++++++++----------
>  src/mesa/drivers/dri/i965/brw_vs.c           | 32 +++++++++-----------
>  src/mesa/drivers/dri/i965/brw_vs.h           |  3 ++
>  src/mesa/drivers/dri/i965/brw_wm.c           | 45 +++++++++++++---------------
>  src/mesa/drivers/dri/i965/brw_wm.h           |  3 ++
>  9 files changed, 86 insertions(+), 84 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c
> index 653c4b6..fd68aa3 100644
> --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c
> @@ -221,10 +221,19 @@ static void populate_key(struct brw_context *brw,
>  
>  /* Calculate interpolants for triangle and line rasterization.
>   */
> -static void
> +void
>  brw_upload_ff_gs_prog(struct brw_context *brw)
>  {
>     struct brw_ff_gs_prog_key key;
> +
> +   if (((brw->state.dirty.mesa & _NEW_LIGHT) |
> +        (brw->state.dirty.brw & (BRW_NEW_PRIMITIVE |
> +                                 BRW_NEW_TRANSFORM_FEEDBACK |
> +                                 BRW_NEW_VS_PROG_DATA))) == 0)
> +   {
> +      return;
> +   }
> +
>     /* Populate the key:
>      */
>     populate_key(brw, &key);
> @@ -247,13 +256,3 @@ void gen6_brw_upload_ff_gs_prog(struct brw_context *brw)
>  {
>     brw_upload_ff_gs_prog(brw);
>  }
> -
> -const struct brw_tracked_state brw_ff_gs_prog = {
> -   .dirty = {
> -      .mesa  = _NEW_LIGHT,
> -      .brw   = BRW_NEW_PRIMITIVE |
> -               BRW_NEW_TRANSFORM_FEEDBACK |
> -               BRW_NEW_VS_PROG_DATA,
> -   },
> -   .emit = brw_upload_ff_gs_prog
> -};
> diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.h b/src/mesa/drivers/dri/i965/brw_ff_gs.h
> index a538948..e4afdab 100644
> --- a/src/mesa/drivers/dri/i965/brw_ff_gs.h
> +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.h
> @@ -112,4 +112,7 @@ void gen6_sol_program(struct brw_ff_gs_compile *c,
>                        unsigned num_verts, bool check_edge_flag);
>  void gen6_brw_upload_ff_gs_prog(struct brw_context *brw);
>  
> +void
> +brw_upload_ff_gs_prog(struct brw_context *brw);
> +
>  #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
> index c7ebe5f..0fa06bb 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -292,8 +292,7 @@ do_gs_prog(struct brw_context *brw,
>     return true;
>  }
>  
> -
> -static void
> +void
>  brw_upload_gs_prog(struct brw_context *brw)
>  {
>     struct gl_context *ctx = &brw->ctx;
> @@ -303,6 +302,14 @@ brw_upload_gs_prog(struct brw_context *brw)
>     struct brw_geometry_program *gp =
>        (struct brw_geometry_program *) brw->geometry_program;
>  
> +   if (((brw->state.dirty.mesa & _NEW_TEXTURE) |
> +        (brw->state.dirty.brw & (BRW_NEW_GEOMETRY_PROGRAM |
> +                                 BRW_NEW_TRANSFORM_FEEDBACK |
> +                                 BRW_NEW_VUE_MAP_VS))) == 0)
> +   {
> +      return;
> +   }
> +
>     if (gp == NULL) {
>        /* No geometry shader.  Vertex data just passes straight through. */
>        if (brw->state.dirty.brw & BRW_NEW_VUE_MAP_VS) {
> @@ -358,18 +365,6 @@ brw_upload_gs_prog(struct brw_context *brw)
>     }
>  }
>  
> -
> -const struct brw_tracked_state brw_gs_prog = {
> -   .dirty = {
> -      .mesa  = _NEW_TEXTURE,
> -      .brw   = BRW_NEW_GEOMETRY_PROGRAM |
> -               BRW_NEW_TRANSFORM_FEEDBACK |
> -               BRW_NEW_VUE_MAP_VS,
> -   },
> -   .emit = brw_upload_gs_prog
> -};
> -
> -
>  bool
>  brw_gs_precompile(struct gl_context *ctx,
>                    struct gl_shader_program *shader_prog,
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
> index 5a15fa9..5f7c437 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.h
> +++ b/src/mesa/drivers/dri/i965/brw_gs.h
> @@ -26,6 +26,8 @@
>  
>  #include <stdbool.h>
>  
> +#include "brw_context.h"
> +
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
> @@ -36,6 +38,9 @@ struct gl_program;
>  
>  bool brw_gs_prog_data_compare(const void *a, const void *b);
>  
> +void
> +brw_upload_gs_prog(struct brw_context *brw);
> +
>  #ifdef __cplusplus
>  } /* extern "C" */
>  #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index 52d96f4..049bdbee 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -36,17 +36,17 @@
>  #include "drivers/common/meta.h"
>  #include "intel_batchbuffer.h"
>  #include "intel_buffers.h"
> +#include "brw_vs.h"
> +#include "brw_ff_gs.h"
> +#include "brw_gs.h"
> +#include "brw_wm.h"
>  
>  static const struct brw_tracked_state *gen4_atoms[] =
>  {
> -   &brw_vs_prog, /* must do before GS prog, state base address. */
> -   &brw_ff_gs_prog, /* must do before state base address */
> -
>     &brw_interpolation_map,
>  
>     &brw_clip_prog, /* must do before state base address */
>     &brw_sf_prog, /* must do before state base address */
> -   &brw_wm_prog, /* must do before state base address */
>  
>     /* Once all the programs are done, we know how large urb entry
>      * sizes need to be and can decide if we need to change the urb
> @@ -107,10 +107,6 @@ static const struct brw_tracked_state *gen4_atoms[] =
>  
>  static const struct brw_tracked_state *gen6_atoms[] =
>  {
> -   &brw_vs_prog, /* must do before state base address */
> -   &brw_gs_prog, /* must do before state base address */
> -   &brw_wm_prog, /* must do before state base address */
> -
>     &gen6_clip_vp,
>     &gen6_sf_vp,
>  
> @@ -180,10 +176,6 @@ static const struct brw_tracked_state *gen6_atoms[] =
>  
>  static const struct brw_tracked_state *gen7_atoms[] =
>  {
> -   &brw_vs_prog,
> -   &brw_gs_prog,
> -   &brw_wm_prog,
> -
>     /* Command packets: */
>  
>     /* must do before binding table pointers, cc state ptrs */
> @@ -256,10 +248,6 @@ static const struct brw_tracked_state *gen7_atoms[] =
>  
>  static const struct brw_tracked_state *gen8_atoms[] =
>  {
> -   &brw_vs_prog,
> -   &brw_gs_prog,
> -   &brw_wm_prog,
> -
>     /* Command packets: */
>     &gen8_state_base_address,
>  
> @@ -562,6 +550,19 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map)
>     }
>  }
>  
> +static void
> +brw_upload_programs(struct brw_context *brw)
> +{
> +   brw_upload_vs_prog(brw);
> +
> +   if (brw->gen < 6)
> +      brw_upload_ff_gs_prog(brw);
> +   else
> +      brw_upload_gs_prog(brw);
> +
> +   brw_upload_wm_prog(brw);
> +}
> +
>  /***********************************************************************
>   * Emit all state:
>   */
> @@ -612,6 +613,8 @@ void brw_upload_state(struct brw_context *brw)
>     if ((state->mesa | state->brw) == 0)
>        return;
>  
> +   brw_upload_programs(brw);
> +
>     if (unlikely(INTEL_DEBUG)) {
>        /* Debug version which enforces various sanity checks on the
>         * state flags which are generated and checked to help ensure
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
> index 2d56b74..a7d2988 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -409,8 +409,8 @@ brw_setup_vue_key_clip_info(struct brw_context *brw,
>     }
>  }
>  
> -
> -static void brw_upload_vs_prog(struct brw_context *brw)
> +void
> +brw_upload_vs_prog(struct brw_context *brw)
>  {
>     struct gl_context *ctx = &brw->ctx;
>     struct brw_vs_prog_key key;
> @@ -420,6 +420,18 @@ static void brw_upload_vs_prog(struct brw_context *brw)
>     struct gl_program *prog = (struct gl_program *) brw->vertex_program;
>     int i;
>  
> +   if (((brw->state.dirty.mesa & (_NEW_BUFFERS |
> +                                 _NEW_LIGHT |
> +                                 _NEW_POINT |
> +                                 _NEW_POLYGON |
> +                                 _NEW_TEXTURE |
> +                                 _NEW_TRANSFORM)) |
> +        (brw->state.dirty.brw & (BRW_NEW_VERTEX_PROGRAM |
> +                                 BRW_NEW_VS_ATTRIB_WORKAROUNDS))) == 0)
> +   {
> +      return;
> +   }
> +
>     memset(&key, 0, sizeof(key));
>  
>     /* Just upload the program verbatim for now.  Always send it all
> @@ -482,22 +494,6 @@ static void brw_upload_vs_prog(struct brw_context *brw)
>     }
>  }
>  
> -/* See brw_vs.c:
> - */
> -const struct brw_tracked_state brw_vs_prog = {
> -   .dirty = {
> -      .mesa  = _NEW_BUFFERS |
> -               _NEW_LIGHT |
> -               _NEW_POINT |
> -               _NEW_POLYGON |
> -               _NEW_TEXTURE |
> -               _NEW_TRANSFORM,
> -      .brw   = BRW_NEW_VERTEX_PROGRAM |
> -               BRW_NEW_VS_ATTRIB_WORKAROUNDS,
> -   },
> -   .emit = brw_upload_vs_prog
> -};
> -
>  bool
>  brw_vs_precompile(struct gl_context *ctx,
>                    struct gl_shader_program *shader_prog,
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
> index 93c5389..bad0f07 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.h
> +++ b/src/mesa/drivers/dri/i965/brw_vs.h
> @@ -72,6 +72,9 @@ void brw_vs_debug_recompile(struct brw_context *brw,
>                              const struct brw_vs_prog_key *key);
>  bool brw_vs_prog_data_compare(const void *a, const void *b);
>  
> +void
> +brw_upload_vs_prog(struct brw_context *brw);
> +
>  #ifdef __cplusplus
>  } /* extern "C" */
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
> index e7939f0..eace7f9 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -582,8 +582,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
>     key->program_string_id = fp->id;
>  }
>  
> -
> -static void
> +void
>  brw_upload_wm_prog(struct brw_context *brw)
>  {
>     struct gl_context *ctx = &brw->ctx;
> @@ -591,6 +590,25 @@ brw_upload_wm_prog(struct brw_context *brw)
>     struct brw_fragment_program *fp = (struct brw_fragment_program *)
>        brw->fragment_program;
>  

What about putting this in brw_state.h:

static inline
state_dirty(struct brw_context *brw, uint32_t mesa_bits, uint64_t brw_bits)
{
   return (brw->state.dirty.mesa & mesa_bits) |
          (brw->state.dirty.brw & brw_bits);
}

and then writing this as:

   if (!state_dirty(brw,
                    _NEW_COLOR |
                    _NEW_DEPTH |
                    _NEW_FRAG_CLAMP |
                    _NEW_HINT |
                    _NEW_LIGHT |
                    _NEW_LINE |
                    _NEW_MULTISAMPLE |
                    _NEW_POLYGON |
                    _NEW_STENCIL |
                    _NEW_TEXTURE,
                    BRW_NEW_FRAGMENT_PROGRAM |
                    BRW_NEW_REDUCED_PRIMITIVE |
                    BRW_NEW_STATS_WM |
                    BRW_NEW_VUE_MAP_GEOM_OUT))
      return;

I definitely like how the dirty flag list remains close to the code that
relies on them - in fact, it's even closer now.

With that suggested change, this patch is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

I could potentially see us moving to this approach in general - instead
of a gen7_atoms list, we could just have a gen7_upload_state() function
that calls everything directly, rather than via a function pointer list.
This could be more efficient, and writing a function directly might give
us a bit more flexibility to pass data between pieces without having to
store everything in brw_context awkwardly.

I'm not suggesting you do extra work - just saying, I like this :)

Jordan may have thoughts as well.

> +   if (((brw->state.dirty.mesa & (_NEW_BUFFERS |
> +                                  _NEW_COLOR |
> +                                  _NEW_DEPTH |
> +                                  _NEW_FRAG_CLAMP |
> +                                  _NEW_HINT |
> +                                  _NEW_LIGHT |
> +                                  _NEW_LINE |
> +                                  _NEW_MULTISAMPLE |
> +                                  _NEW_POLYGON |
> +                                  _NEW_STENCIL |
> +                                  _NEW_TEXTURE)) |
> +        (brw->state.dirty.brw & (BRW_NEW_FRAGMENT_PROGRAM |
> +                                 BRW_NEW_REDUCED_PRIMITIVE |
> +                                 BRW_NEW_STATS_WM |
> +                                 BRW_NEW_VUE_MAP_GEOM_OUT))) == 0)
> +   {
> +      return;
> +   }
> +
>     brw_wm_populate_key(brw, &key);
>  
>     if (!brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG,
> @@ -603,26 +621,3 @@ brw_upload_wm_prog(struct brw_context *brw)
>     }
>     brw->wm.base.prog_data = &brw->wm.prog_data->base;
>  }
> -
> -
> -const struct brw_tracked_state brw_wm_prog = {
> -   .dirty = {
> -      .mesa  = _NEW_BUFFERS |
> -               _NEW_COLOR |
> -               _NEW_DEPTH |
> -               _NEW_FRAG_CLAMP |
> -               _NEW_HINT |
> -               _NEW_LIGHT |
> -               _NEW_LINE |
> -               _NEW_MULTISAMPLE |
> -               _NEW_POLYGON |
> -               _NEW_STENCIL |
> -               _NEW_TEXTURE,
> -      .brw   = BRW_NEW_FRAGMENT_PROGRAM |
> -               BRW_NEW_REDUCED_PRIMITIVE |
> -               BRW_NEW_STATS_WM |
> -               BRW_NEW_VUE_MAP_GEOM_OUT,
> -   },
> -   .emit = brw_upload_wm_prog
> -};
> -
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
> index a12c7d4..f54530f 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.h
> +++ b/src/mesa/drivers/dri/i965/brw_wm.h
> @@ -83,4 +83,7 @@ void brw_wm_debug_recompile(struct brw_context *brw,
>                              const struct brw_wm_prog_key *key);
>  bool brw_wm_prog_data_compare(const void *a, const void *b);
>  
> +void
> +brw_upload_wm_prog(struct brw_context *brw);
> +
>  #endif
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150219/d5cfaa01/attachment.sig>


More information about the mesa-dev mailing list