Mesa (master): i965 Gen6+: De-compact clip planes.

Paul Berry stereotype441 at kemper.freedesktop.org
Fri Oct 7 02:35:24 UTC 2011


Module: Mesa
Branch: master
Commit: 018ea68d8780ab5baeef0b8122b8410e5e55ae6d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=018ea68d8780ab5baeef0b8122b8410e5e55ae6d

Author: Paul Berry <stereotype441 at gmail.com>
Date:   Tue Sep 27 12:57:08 2011 -0700

i965 Gen6+: De-compact clip planes.

Previously, if the user enabled a non-consecutive set of clip planes
(e.g. 0, 1, and 3), the driver would compact them down to a
consecutive set starting at 0.  This optimization was of dubious
value, and complicated the implementation of gl_ClipDistance.

This patch changes the driver so that with Gen6 and later chipsets, we
no longer compact the clip planes.  However, we still discard any clip
planes beyond the highest number that is in use, so performance should
not be affected for applications that use clip planes consecutively
from 0.

With chipsets previous to Gen6, we still compact the clip planes,
since the pre-Gen6 clipper thread relies on this behavior.

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
Reviewed-by: Eric Anholt <eric at anholt.net>

---

 src/mesa/drivers/dri/i965/brw_state.h          |    5 ---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   36 ++++++++++++++++--------
 src/mesa/drivers/dri/i965/brw_vs.c             |   17 ++++++++---
 src/mesa/drivers/dri/i965/brw_vs.h             |   15 ++++++----
 src/mesa/drivers/dri/i965/brw_vs_emit.c        |   14 ++++----
 src/mesa/drivers/dri/i965/gen6_clip_state.c    |   28 +-----------------
 src/mesa/drivers/dri/i965/gen7_clip_state.c    |    9 +----
 7 files changed, 56 insertions(+), 68 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index fabf0c0..6fc95eb 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -213,9 +213,4 @@ get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
 unsigned int
 gen7_depth_format(struct brw_context *brw);
 
-/* gen6_clip_state.c */
-uint32_t
-brw_compute_userclip_flags(bool uses_clip_distance,
-                           GLbitfield clip_planes_enabled);
-
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 680bd7d..b26700f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -559,18 +559,29 @@ vec4_visitor::setup_uniform_clipplane_values()
 {
    gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 
+   /* Pre-Gen6, we compact clip planes.  For example, if the user
+    * enables just clip planes 0, 1, and 3, we will enable clip planes
+    * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
+    * plane 2.  This simplifies the implementation of the Gen6 clip
+    * thread.
+    *
+    * In Gen6 and later, we don't compact clip planes, because this
+    * simplifies the implementation of gl_ClipDistance.
+    */
    int compacted_clipplane_index = 0;
-   for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
-      if (c->key.userclip_planes_enabled & (1 << i)) {
-         this->uniform_vector_size[this->uniforms] = 4;
-         this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
-         this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
-         for (int j = 0; j < 4; ++j) {
-            c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
-         }
-         ++compacted_clipplane_index;
-         ++this->uniforms;
+   for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
+      if (intel->gen < 6 &&
+          !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
+         continue;
+      }
+      this->uniform_vector_size[this->uniforms] = 4;
+      this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
+      this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
+      for (int j = 0; j < 4; ++j) {
+         c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
       }
+      ++compacted_clipplane_index;
+      ++this->uniforms;
    }
 }
 
@@ -1807,7 +1818,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
       }
 
       current_annotation = "Clipping flags";
-      for (i = 0; i < c->key.nr_userclip_planes; i++) {
+      for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
 	 vec4_instruction *inst;
 
 	 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
@@ -1883,7 +1894,8 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
       clip_vertex = VERT_RESULT_HPOS;
    }
 
-   for (int i = 0; i + offset < c->key.nr_userclip_planes && i < 4; ++i) {
+   for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
+        ++i) {
       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
                src_reg(output_reg[clip_vertex]),
                src_reg(this->userplane[i + offset])));
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 14e91d8..f671223 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -276,7 +276,8 @@ do_vs_prog(struct brw_context *brw,
 
 static void brw_upload_vs_prog(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
    struct brw_vs_prog_key key;
    struct brw_vertex_program *vp = 
       (struct brw_vertex_program *)brw->vertex_program;
@@ -290,10 +291,16 @@ static void brw_upload_vs_prog(struct brw_context *brw)
    key.program_string_id = vp->id;
    key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
    key.uses_clip_distance = vp->program.UsesClipDistance;
-   if (!key.uses_clip_distance) {
-      key.userclip_planes_enabled = ctx->Transform.ClipPlanesEnabled;
-      key.nr_userclip_planes
-         = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
+   if (key.userclip_active && !key.uses_clip_distance) {
+      if (intel->gen < 6) {
+         key.nr_userclip_plane_consts
+            = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
+         key.userclip_planes_enabled_gen_4_5
+            = ctx->Transform.ClipPlanesEnabled;
+      } else {
+         key.nr_userclip_plane_consts
+            = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
+      }
    }
    key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
 			ctx->Polygon.BackMode != GL_FILL);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index b8d11df..85a1d82 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -53,10 +53,10 @@ struct brw_vs_prog_key {
    GLuint userclip_active:1;
 
    /**
-    * Number of user clip planes active.  Zero if the shader uses
-    * gl_ClipDistance.
+    * How many user clipping planes are being uploaded to the vertex shader as
+    * push constants.
     */
-   GLuint nr_userclip_planes:4;
+   GLuint nr_userclip_plane_consts:4;
 
    /**
     * True if the shader uses gl_ClipDistance, regardless of whether any clip
@@ -65,10 +65,13 @@ struct brw_vs_prog_key {
    GLuint uses_clip_distance:1;
 
    /**
-    * Which user clip planes are active.  Zero if the shader uses
-    * gl_ClipDistance.
+    * For pre-Gen6 hardware, a bitfield indicating which clipping planes are
+    * enabled.  This is used to compact clip planes.
+    *
+    * For Gen6 and later hardware, clip planes are not compacted, so this
+    * value is zero to avoid provoking unnecessary shader recompiles.
     */
-   GLuint userclip_planes_enabled:MAX_CLIP_PLANES;
+   GLuint userclip_planes_enabled_gen_4_5:MAX_CLIP_PLANES;
 
    GLuint copy_edgeflag:1;
    GLuint point_coord_replace:8;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 8845580..7326b3a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -204,17 +204,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     */
    if (c->key.userclip_active) {
       if (intel->gen >= 6) {
-	 for (i = 0; i < c->key.nr_userclip_planes; i++) {
+	 for (i = 0; i <= c->key.nr_userclip_plane_consts; i++) {
 	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
 						  (i % 2) * 4), 0, 4, 1);
 	 }
-	 reg += ALIGN(c->key.nr_userclip_planes, 2) / 2;
+	 reg += ALIGN(c->key.nr_userclip_plane_consts, 2) / 2;
       } else {
-	 for (i = 0; i < c->key.nr_userclip_planes; i++) {
+	 for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
 	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
 						  (i % 2) * 4), 0, 4, 1);
 	 }
-	 reg += (ALIGN(6 + c->key.nr_userclip_planes, 4) / 4) * 2;
+	 reg += (ALIGN(6 + c->key.nr_userclip_plane_consts, 4) / 4) * 2;
       }
 
    }
@@ -239,7 +239,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     */
    if (intel->gen >= 6) {
       /* We can only load 32 regs of push constants. */
-      max_constant = 32 * 2 - c->key.nr_userclip_planes;
+      max_constant = 32 * 2 - c->key.nr_userclip_plane_consts;
    } else {
       max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
    }
@@ -1565,7 +1565,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 
       /* Set the user clip distances in dword 8-15. (m3-4)*/
       if (c->key.userclip_active) {
-	 for (i = 0; i < c->key.nr_userclip_planes; i++) {
+	 for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
 	    struct brw_reg m;
 	    if (i < 4)
 	       m = brw_message_reg(3);
@@ -1593,7 +1593,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 		 header1, brw_imm_ud(0x7ff<<8));
       }
 
-      for (i = 0; i < c->key.nr_userclip_planes; i++) {
+      for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
 	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
 	 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
 	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 801b88f..9b36af4 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -31,25 +31,6 @@
 #include "brw_util.h"
 #include "intel_batchbuffer.h"
 
-uint32_t
-brw_compute_userclip_flags(bool uses_clip_distance,
-                           GLbitfield clip_planes_enabled)
-{
-   if (uses_clip_distance) {
-      /* When using gl_ClipDistance, it is up to the shader to decide which
-       * clip distance values to use.
-       */
-      return clip_planes_enabled;
-   } else {
-      /* When using clipping planes, we compact the ones that are in use so
-       * that they are always numbered consecutively from zero, so we need to
-       * enable clipping planes 0 through n-1 in the hardware regardless of
-       * which planes the user has selected.
-       */
-      return (1 << _mesa_bitcount_64(clip_planes_enabled)) - 1;
-   }
-}
-
 static void
 upload_clip_state(struct brw_context *brw)
 {
@@ -58,10 +39,6 @@ upload_clip_state(struct brw_context *brw)
    uint32_t depth_clamp = 0;
    uint32_t provoking, userclip;
 
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct brw_vertex_program *vp =
-      (struct brw_vertex_program *)brw->vertex_program;
-
    if (!ctx->Transform.DepthClamp)
       depth_clamp = GEN6_CLIP_Z_TEST;
 
@@ -79,8 +56,7 @@ upload_clip_state(struct brw_context *brw)
    }
 
    /* _NEW_TRANSFORM */
-   userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance,
-                                         ctx->Transform.ClipPlanesEnabled);
+   userclip = ctx->Transform.ClipPlanesEnabled;
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
@@ -101,7 +77,7 @@ upload_clip_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_clip_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_LIGHT,
-      .brw   = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM,
+      .brw   = BRW_NEW_CONTEXT,
       .cache = 0
    },
    .emit = upload_clip_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c
index 5458ce8..2f512f5 100644
--- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
@@ -39,10 +39,6 @@ upload_clip_state(struct brw_context *brw)
    /* _NEW_BUFFERS */
    GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
 
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct brw_vertex_program *vp =
-      (struct brw_vertex_program *)brw->vertex_program;
-
    dw1 |= GEN7_CLIP_EARLY_CULL;
 
    /* _NEW_POLYGON */
@@ -86,8 +82,7 @@ upload_clip_state(struct brw_context *brw)
    }
 
    /* _NEW_TRANSFORM */
-   userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance,
-                                         ctx->Transform.ClipPlanesEnabled);
+   userclip = ctx->Transform.ClipPlanesEnabled;
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
@@ -111,7 +106,7 @@ const struct brw_tracked_state gen7_clip_state = {
                 _NEW_POLYGON |
                 _NEW_LIGHT |
                 _NEW_TRANSFORM),
-      .brw   = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM,
+      .brw   = BRW_NEW_CONTEXT,
       .cache = 0
    },
    .emit = upload_clip_state,




More information about the mesa-commit mailing list