[Mesa-dev] [PATCH 4/6] i965: Make brw_compute_vue_map's userclip dependency a boolean.

Ian Romanick idr at freedesktop.org
Tue Oct 4 10:54:21 PDT 2011


On 10/03/2011 03:11 PM, Paul Berry wrote:
> Previously, brw_compute_vue_map required an argument indicating the
> number of clip planes in use, but all it did with it was check if it
> was nonzero.
>
> This patch changes brw_compute_vue_map to take a boolean instead.
> This allows us to avoid some unnecessary recompilation of the Gen4/5
> GS and SF threads.
> ---
>   src/mesa/drivers/dri/i965/brw_clip.c           |    2 +-
>   src/mesa/drivers/dri/i965/brw_context.h        |    3 ++-
>   src/mesa/drivers/dri/i965/brw_gs.c             |    4 ++--
>   src/mesa/drivers/dri/i965/brw_gs.h             |    3 +--
>   src/mesa/drivers/dri/i965/brw_sf.c             |    4 ++--
>   src/mesa/drivers/dri/i965/brw_sf.h             |    3 +--
>   src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |    2 +-
>   src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |    4 ++--
>   src/mesa/drivers/dri/i965/brw_vs.c             |    6 ++++--
>   src/mesa/drivers/dri/i965/brw_vs.h             |    6 ++++++
>   src/mesa/drivers/dri/i965/brw_vs_emit.c        |   10 +++++-----
>   src/mesa/drivers/dri/i965/gen6_sf_state.c      |    6 +++---
>   src/mesa/drivers/dri/i965/gen7_sf_state.c      |    4 ++--
>   13 files changed, 32 insertions(+), 25 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
> index 2eb6044..fde3472 100644
> --- a/src/mesa/drivers/dri/i965/brw_clip.c
> +++ b/src/mesa/drivers/dri/i965/brw_clip.c
> @@ -69,7 +69,7 @@ static void compile_clip_prog( struct brw_context *brw,
>      c.func.single_program_flow = 1;
>
>      c.key = *key;
> -   brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip, c.key.attrs);
> +   brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip>  0, c.key.attrs);
>
>      /* nr_regs is the number of registers filled by reading data from the VUE.
>       * This program accesses the entire VUE, so nr_regs needs to be the size of
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 7c5bfd0..c81713a 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -965,7 +965,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
>
>   /* brw_vs.c */
>   void brw_compute_vue_map(struct brw_vue_map *vue_map,
> -                         const struct intel_context *intel, int nr_userclip,
> +                         const struct intel_context *intel,
> +                         bool userclip_active,
>                            GLbitfield64 outputs_written);
>   GLclipplane *brw_select_clip_planes(struct gl_context *ctx);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
> index 0a37485..3b56aae 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -64,7 +64,7 @@ static void compile_gs_prog( struct brw_context *brw,
>      c.key = *key;
>      /* The geometry shader needs to access the entire VUE. */
>      struct brw_vue_map vue_map;
> -   brw_compute_vue_map(&vue_map, intel, c.key.nr_userclip, c.key.attrs);
> +   brw_compute_vue_map(&vue_map, intel, c.key.userclip_active, c.key.attrs);
>      c.nr_regs = (vue_map.num_slots + 1)/2;
>
>      mem_ctx = NULL;
> @@ -159,7 +159,7 @@ static void populate_key( struct brw_context *brw,
>      }
>
>      /* _NEW_TRANSFORM */
> -   key->nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
> +   key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
>
>      key->need_gs_prog = (intel->gen>= 6)
>         ? 0
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
> index d8637c8..cee7467 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.h
> +++ b/src/mesa/drivers/dri/i965/brw_gs.h
> @@ -44,8 +44,7 @@ struct brw_gs_prog_key {
>      GLuint primitive:4;
>      GLuint pv_first:1;
>      GLuint need_gs_prog:1;
> -   GLuint nr_userclip:4;
> -   GLuint pad:22;
> +   GLuint userclip_active:1;
>   };
>
>   struct brw_gs_compile {
> diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
> index 4e0434a..7a0cd92 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf.c
> +++ b/src/mesa/drivers/dri/i965/brw_sf.c
> @@ -63,7 +63,7 @@ static void compile_sf_prog( struct brw_context *brw,
>      brw_init_compile(brw,&c.func, mem_ctx);
>
>      c.key = *key;
> -   brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip, c.key.attrs);
> +   brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs);
>      c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel);
>      c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
>      c.nr_setup_regs = c.nr_attr_regs;
> @@ -154,7 +154,7 @@ static void upload_sf_prog(struct brw_context *brw)
>      }
>
>      /* _NEW_TRANSFORM */
> -   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
> +   key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
>
>      /* _NEW_POINT */
>      key.do_point_sprite = ctx->Point.PointSprite;
> diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
> index f39ad27..8de5f5a 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf.h
> +++ b/src/mesa/drivers/dri/i965/brw_sf.h
> @@ -53,8 +53,7 @@ struct brw_sf_prog_key {
>      GLuint frontface_ccw:1;
>      GLuint do_point_sprite:1;
>      GLuint sprite_origin_lower_left:1;
> -   GLuint nr_userclip:4;
> -   GLuint pad:20;
> +   GLuint userclip_active:1;

In the past we've had a bunch of pad elements to document how many bits 
were left.  I've never been too fond of that as it creates extra 
maintenance.  However, others might care, and this change is easy to 
miss in the rest of the patch.

>   };
>
>   struct brw_sf_compile {
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index 5cbbda8..ccbad25 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -595,7 +595,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
>   bool
>   vec4_visitor::run()
>   {
> -   if (c->key.nr_userclip&&  !c->key.uses_clip_distance)
> +   if (c->key.userclip_active&&  !c->key.uses_clip_distance)
>         setup_uniform_clipplane_values();
>
>      /* Generate VS IR for main().  (the visitor only descends into
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index dd48325..5d75946 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1790,7 +1790,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
>   {
>      if (intel->gen<  6&&
>          ((c->prog_data.outputs_written&  BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
> -        c->key.nr_userclip || brw->has_negative_rhw_bug)) {
> +        c->key.userclip_active || brw->has_negative_rhw_bug)) {
>         dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
>         dst_reg header1_w = header1;
>         header1_w.writemask = WRITEMASK_W;
> @@ -1996,7 +1996,7 @@ vec4_visitor::emit_urb_writes()
>
>      /* FINISHME: edgeflag */
>
> -   brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
> +   brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
>                          c->prog_data.outputs_written);
>
>      /* First mrf is the g0-based message header containing URB handles and such,
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
> index 96955f5..97a4c39 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -55,7 +55,8 @@ static inline void assign_vue_slot(struct brw_vue_map *vue_map,
>    */
>   void
>   brw_compute_vue_map(struct brw_vue_map *vue_map,
> -                    const struct intel_context *intel, int nr_userclip,
> +                    const struct intel_context *intel,
> +                    bool userclip_active,
>                       GLbitfield64 outputs_written)
>   {
>      int i;
> @@ -112,7 +113,7 @@ brw_compute_vue_map(struct brw_vue_map *vue_map,
>          */
>         assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
>         assign_vue_slot(vue_map, VERT_RESULT_HPOS);
> -      if (nr_userclip) {
> +      if (userclip_active) {
>            assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST0);
>            assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST1);
>         }
> @@ -287,6 +288,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
>       * the inputs it asks for, whether they are varying or not.
>       */
>      key.program_string_id = vp->id;
> +   key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
>      key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
>      key.uses_clip_distance = vp->program.UsesClipDistance;
>      if (!key.uses_clip_distance)
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
> index a12b139..bd81764 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.h
> +++ b/src/mesa/drivers/dri/i965/brw_vs.h
> @@ -47,6 +47,12 @@ struct brw_vs_prog_key {
>      uint8_t gl_fixed_input_size[VERT_ATTRIB_MAX];
>
>      /**
> +    * True if at least one clip flag is enabled, regardless of whether the
> +    * shader uses clip planes or gl_ClipDistance.
> +    */
> +   GLuint userclip_active:1;
> +
> +   /**
>       * Number of user clip planes (or clip distances) that are active.
>       */
>      GLuint nr_userclip:4;
> diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
> index 4f7a442..e78cb6c 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
> @@ -202,7 +202,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
>
>      /* User clip planes from curbe:
>       */
> -   if (c->key.nr_userclip) {
> +   if (c->key.userclip_active) {
>         if (intel->gen>= 6) {
>   	 for (i = 0; i<  c->key.nr_userclip; i++) {
>   	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
> @@ -325,7 +325,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
>
>      /* Allocate outputs.  The non-position outputs go straight into message regs.
>       */
> -   brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
> +   brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
>                          c->prog_data.outputs_written);
>      c->first_output = reg;
>
> @@ -1564,7 +1564,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
>         }
>
>         /* Set the user clip distances in dword 8-15. (m3-4)*/
> -      if (c->key.nr_userclip) {
> +      if (c->key.userclip_active) {
>   	 for (i = 0; i<  c->key.nr_userclip; i++) {
>   	    struct brw_reg m;
>   	    if (i<  4)
> @@ -1577,7 +1577,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
>         }
>      } else if ((c->prog_data.outputs_written&
>   	BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
> -	      c->key.nr_userclip || brw->has_negative_rhw_bug) {
> +	      c->key.userclip_active || brw->has_negative_rhw_bug) {
>         struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
>         GLuint i;
>
> @@ -1649,7 +1649,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
>          */
>         brw_MOV(p, brw_message_reg(2), pos);
>         len_vertex_header = 1;
> -      if (c->key.nr_userclip>  0)
> +      if (c->key.userclip_active)
>   	 len_vertex_header += 2;
>      } else if (intel->gen == 5) {
>         /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
> diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> index 3eca786..c8068d1 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> @@ -126,12 +126,12 @@ upload_sf_state(struct brw_context *brw)
>      int urb_entry_read_offset = 1;
>      float point_size;
>      uint16_t attr_overrides[FRAG_ATTRIB_MAX];
> -   int nr_userclip;
> +   bool userclip_active;
>
>      /* _NEW_TRANSFORM */
> -   nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
> +   userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
>
> -   brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written);
> +   brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written);
>      urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset;
>      if (urb_entry_read_length == 0) {
>         /* Setting the URB entry read length to 0 causes undefined behavior, so
> diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
> index 75dc6da..52cccba 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
> @@ -44,10 +44,10 @@ upload_sbe_state(struct brw_context *brw)
>      int attr = 0, input_index = 0;
>      /* _NEW_TRANSFORM */
>      int urb_entry_read_offset = 1;
> -   int nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
> +   bool userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
>      uint16_t attr_overrides[FRAG_ATTRIB_MAX];
>
> -   brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written);
> +   brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written);
>      urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset;
>      if (urb_entry_read_length == 0) {
>         /* Setting the URB entry read length to 0 causes undefined behavior, so



More information about the mesa-dev mailing list