[Mesa-dev] [PATCH 6/6] i965 Gen6+: De-compact clip planes.

Ian Romanick idr at freedesktop.org
Tue Oct 4 10:54:23 PDT 2011


On 10/03/2011 03:11 PM, Paul Berry wrote:
> Previously, if the user enabled a non-consecutive set of clip planes
> (e.g. 0, 1, and 3), the driver would compact them down to a
> consecutive set starting at 0.  This optimization was of dubious
> value, and complicated the implementation of gl_ClipDistance.
>
> This patch changes the driver so that with Gen6 and later chipsets, we
> no longer compact the clip planes.  However, we still discard any clip
> planes beyond the highest number that is in use, so performance should
> not be affected for applications that use clip planes consecutively
> from 0.
>
> With chipsets previous to Gen6, we still compact the clip planes,
> since the pre-Gen6 clipper thread relies on this behavior.

Unless we know of some specific cases where a non-consecutive set of 
clip planes is used and performance would be adversely affected by this 
change, I'm in favor of removing this optimization.

The code gets a bit ugly due to treating GEN6 and GEN4/5 differently. 
How much work would it be fix the clipping on those chipsets to not need 
the compaction?  (Based on previous discussions with Eric about the 
clipper, I suspect the answer is, "A lot.")

> ---
>   src/mesa/drivers/dri/i965/brw_state.h          |    5 ---
>   src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   36 ++++++++++++++++--------
>   src/mesa/drivers/dri/i965/brw_vs.c             |   17 ++++++++---
>   src/mesa/drivers/dri/i965/brw_vs.h             |   15 ++++++----
>   src/mesa/drivers/dri/i965/brw_vs_emit.c        |   14 ++++----
>   src/mesa/drivers/dri/i965/gen6_clip_state.c    |   28 +-----------------
>   src/mesa/drivers/dri/i965/gen7_clip_state.c    |    9 +----
>   7 files changed, 56 insertions(+), 68 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index fabf0c0..6fc95eb 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -213,9 +213,4 @@ get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
>   unsigned int
>   gen7_depth_format(struct brw_context *brw);
>
> -/* gen6_clip_state.c */
> -uint32_t
> -brw_compute_userclip_flags(bool uses_clip_distance,
> -                           GLbitfield clip_planes_enabled);
> -
>   #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index 3c2f9ba..a117c37 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -559,18 +559,29 @@ vec4_visitor::setup_uniform_clipplane_values()
>   {
>      GLclipplane *clip_planes = brw_select_clip_planes(ctx);
>
> +   /* Pre-Gen6, we compact clip planes.  For example, if the user
> +    * enables just clip planes 0, 1, and 3, we will enable clip planes
> +    * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
> +    * plane 2.  This simplifies the implementation of the Gen6 clip
> +    * thread.
> +    *
> +    * In Gen6 and later, we don't compact clip planes, because this
> +    * simplifies the implementation of gl_ClipDistance.
> +    */
>      int compacted_clipplane_index = 0;
> -   for (int i = 0; i<  MAX_CLIP_PLANES; ++i) {
> -      if (c->key.userclip_planes_enabled&  (1<<  i)) {
> -         this->uniform_vector_size[this->uniforms] = 4;
> -         this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
> -         this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
> -         for (int j = 0; j<  4; ++j) {
> -            c->prog_data.param[this->uniforms * 4 + j] =&clip_planes[i][j];
> -         }
> -         ++compacted_clipplane_index;
> -         ++this->uniforms;
> +   for (int i = 0; i<  c->key.nr_userclip_plane_consts; ++i) {
> +      if (intel->gen<  6&&
> +          !(c->key.userclip_planes_enabled_gen_4_5&  (1<<  i))) {
> +         continue;
> +      }
> +      this->uniform_vector_size[this->uniforms] = 4;
> +      this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
> +      this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
> +      for (int j = 0; j<  4; ++j) {
> +         c->prog_data.param[this->uniforms * 4 + j] =&clip_planes[i][j];
>         }
> +      ++compacted_clipplane_index;
> +      ++this->uniforms;
>      }
>   }
>
> @@ -1807,7 +1818,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
>         }
>
>         current_annotation = "Clipping flags";
> -      for (i = 0; i<  c->key.nr_userclip_planes; i++) {
> +      for (i = 0; i<  c->key.nr_userclip_plane_consts; i++) {
>   	 vec4_instruction *inst;
>
>   	 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
> @@ -1883,7 +1894,8 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
>         clip_vertex = VERT_RESULT_HPOS;
>      }
>
> -   for (int i = 0; i + offset<  c->key.nr_userclip_planes&&  i<  4; ++i) {
> +   for (int i = 0; i + offset<  c->key.nr_userclip_plane_consts&&  i<  4;
> +        ++i) {
>         emit(DP4(dst_reg(brw_writemask(reg, 1<<  i)),
>                  src_reg(output_reg[clip_vertex]),
>                  src_reg(this->userplane[i + offset])));
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
> index 19d9552..7e7a230 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -276,7 +276,8 @@ do_vs_prog(struct brw_context *brw,
>
>   static void brw_upload_vs_prog(struct brw_context *brw)
>   {
> -   struct gl_context *ctx =&brw->intel.ctx;
> +   struct intel_context *intel =&brw->intel;
> +   struct gl_context *ctx =&intel->ctx;
>      struct brw_vs_prog_key key;
>      struct brw_vertex_program *vp =
>         (struct brw_vertex_program *)brw->vertex_program;
> @@ -290,10 +291,16 @@ static void brw_upload_vs_prog(struct brw_context *brw)
>      key.program_string_id = vp->id;
>      key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
>      key.uses_clip_distance = vp->program.UsesClipDistance;
> -   if (!key.uses_clip_distance) {
> -      key.userclip_planes_enabled = ctx->Transform.ClipPlanesEnabled;
> -      key.nr_userclip_planes
> -         = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
> +   if (key.userclip_active&&  !key.uses_clip_distance) {
> +      if (intel->gen<  6) {
> +         key.nr_userclip_plane_consts
> +            = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
> +         key.userclip_planes_enabled_gen_4_5
> +            = ctx->Transform.ClipPlanesEnabled;
> +      } else {
> +         key.nr_userclip_plane_consts
> +            = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
> +      }
>      }
>      key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
>   			ctx->Polygon.BackMode != GL_FILL);
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
> index b8d11df..85a1d82 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.h
> +++ b/src/mesa/drivers/dri/i965/brw_vs.h
> @@ -53,10 +53,10 @@ struct brw_vs_prog_key {
>      GLuint userclip_active:1;
>
>      /**
> -    * Number of user clip planes active.  Zero if the shader uses
> -    * gl_ClipDistance.
> +    * How many user clipping planes are being uploaded to the vertex shader as
> +    * push constants.
>       */
> -   GLuint nr_userclip_planes:4;
> +   GLuint nr_userclip_plane_consts:4;
>
>      /**
>       * True if the shader uses gl_ClipDistance, regardless of whether any clip
> @@ -65,10 +65,13 @@ struct brw_vs_prog_key {
>      GLuint uses_clip_distance:1;
>
>      /**
> -    * Which user clip planes are active.  Zero if the shader uses
> -    * gl_ClipDistance.
> +    * For pre-Gen6 hardware, a bitfield indicating which clipping planes are
> +    * enabled.  This is used to compact clip planes.
> +    *
> +    * For Gen6 and later hardware, clip planes are not compacted, so this
> +    * value is zero to avoid provoking unnecessary shader recompiles.
>       */
> -   GLuint userclip_planes_enabled:MAX_CLIP_PLANES;
> +   GLuint userclip_planes_enabled_gen_4_5:MAX_CLIP_PLANES;
>
>      GLuint copy_edgeflag:1;
>      GLuint point_coord_replace:8;
> diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
> index 8845580..7326b3a 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
> @@ -204,17 +204,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
>       */
>      if (c->key.userclip_active) {
>         if (intel->gen>= 6) {
> -	 for (i = 0; i<  c->key.nr_userclip_planes; i++) {
> +	 for (i = 0; i<= c->key.nr_userclip_plane_consts; i++) {
>   	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
>   						  (i % 2) * 4), 0, 4, 1);
>   	 }
> -	 reg += ALIGN(c->key.nr_userclip_planes, 2) / 2;
> +	 reg += ALIGN(c->key.nr_userclip_plane_consts, 2) / 2;
>         } else {
> -	 for (i = 0; i<  c->key.nr_userclip_planes; i++) {
> +	 for (i = 0; i<  c->key.nr_userclip_plane_consts; i++) {
>   	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
>   						  (i % 2) * 4), 0, 4, 1);
>   	 }
> -	 reg += (ALIGN(6 + c->key.nr_userclip_planes, 4) / 4) * 2;
> +	 reg += (ALIGN(6 + c->key.nr_userclip_plane_consts, 4) / 4) * 2;
>         }
>
>      }
> @@ -239,7 +239,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
>       */
>      if (intel->gen>= 6) {
>         /* We can only load 32 regs of push constants. */
> -      max_constant = 32 * 2 - c->key.nr_userclip_planes;
> +      max_constant = 32 * 2 - c->key.nr_userclip_plane_consts;
>      } else {
>         max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
>      }
> @@ -1565,7 +1565,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
>
>         /* Set the user clip distances in dword 8-15. (m3-4)*/
>         if (c->key.userclip_active) {
> -	 for (i = 0; i<  c->key.nr_userclip_planes; i++) {
> +	 for (i = 0; i<  c->key.nr_userclip_plane_consts; i++) {
>   	    struct brw_reg m;
>   	    if (i<  4)
>   	       m = brw_message_reg(3);
> @@ -1593,7 +1593,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
>   		 header1, brw_imm_ud(0x7ff<<8));
>         }
>
> -      for (i = 0; i<  c->key.nr_userclip_planes; i++) {
> +      for (i = 0; i<  c->key.nr_userclip_plane_consts; i++) {
>   	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
>   	 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
>   	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
> diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
> index ffe2c53..9b36af4 100644
> --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
> @@ -31,25 +31,6 @@
>   #include "brw_util.h"
>   #include "intel_batchbuffer.h"
>
> -uint32_t
> -brw_compute_userclip_flags(bool uses_clip_distance,
> -                           GLbitfield clip_planes_enabled)
> -{
> -   if (uses_clip_distance) {
> -      /* When using gl_ClipDistance, it is up to the shader to decide which
> -       * clip distance values to use.
> -       */
> -      return clip_planes_enabled;
> -   } else {
> -      /* When using clipping planes, we compact the ones that are in use so
> -       * that they are always numbered consecutively from zero, so we need to
> -       * enable clipping planes 0 through n-1 in the hardware regardless of
> -       * which planes the user has selected.
> -       */
> -      return (1<<  brw_count_bits(clip_planes_enabled)) - 1;
> -   }
> -}
> -
>   static void
>   upload_clip_state(struct brw_context *brw)
>   {
> @@ -58,10 +39,6 @@ upload_clip_state(struct brw_context *brw)
>      uint32_t depth_clamp = 0;
>      uint32_t provoking, userclip;
>
> -   /* BRW_NEW_VERTEX_PROGRAM */
> -   struct brw_vertex_program *vp =
> -      (struct brw_vertex_program *)brw->vertex_program;
> -
>      if (!ctx->Transform.DepthClamp)
>         depth_clamp = GEN6_CLIP_Z_TEST;
>
> @@ -79,8 +56,7 @@ upload_clip_state(struct brw_context *brw)
>      }
>
>      /* _NEW_TRANSFORM */
> -   userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance,
> -                                         ctx->Transform.ClipPlanesEnabled);
> +   userclip = ctx->Transform.ClipPlanesEnabled;
>
>      BEGIN_BATCH(4);
>      OUT_BATCH(_3DSTATE_CLIP<<  16 | (4 - 2));
> @@ -101,7 +77,7 @@ upload_clip_state(struct brw_context *brw)
>   const struct brw_tracked_state gen6_clip_state = {
>      .dirty = {
>         .mesa  = _NEW_TRANSFORM | _NEW_LIGHT,
> -      .brw   = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM,
> +      .brw   = BRW_NEW_CONTEXT,
>         .cache = 0
>      },
>      .emit = upload_clip_state,
> diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c
> index 5458ce8..2f512f5 100644
> --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
> @@ -39,10 +39,6 @@ upload_clip_state(struct brw_context *brw)
>      /* _NEW_BUFFERS */
>      GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
>
> -   /* BRW_NEW_VERTEX_PROGRAM */
> -   struct brw_vertex_program *vp =
> -      (struct brw_vertex_program *)brw->vertex_program;
> -
>      dw1 |= GEN7_CLIP_EARLY_CULL;
>
>      /* _NEW_POLYGON */
> @@ -86,8 +82,7 @@ upload_clip_state(struct brw_context *brw)
>      }
>
>      /* _NEW_TRANSFORM */
> -   userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance,
> -                                         ctx->Transform.ClipPlanesEnabled);
> +   userclip = ctx->Transform.ClipPlanesEnabled;
>
>      BEGIN_BATCH(4);
>      OUT_BATCH(_3DSTATE_CLIP<<  16 | (4 - 2));
> @@ -111,7 +106,7 @@ const struct brw_tracked_state gen7_clip_state = {
>                   _NEW_POLYGON |
>                   _NEW_LIGHT |
>                   _NEW_TRANSFORM),
> -      .brw   = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM,
> +      .brw   = BRW_NEW_CONTEXT,
>         .cache = 0
>      },
>      .emit = upload_clip_state,



More information about the mesa-dev mailing list