[Mesa-dev] [PATCH 4/7] i965 new VS: don't share clip plane constants in pre-GEN6

Paul Berry stereotype441 at gmail.com
Sun Sep 25 09:22:01 PDT 2011


In pre-GEN6, when using clip planes, both the vertex shader and the
clipper need access to the client-supplied clip planes, since the
vertex shader needs them to set the clip flags, and the clipper needs
them to determine where to insert new vertices.

With the old VS backend, we used a clever optimization to avoid
placing duplicate copies of these planes in the CURBE: we used the
same block of memory for both the clipper and vertex shader constants,
with the clip planes at the front of it, and then we instructed the
clipper to read just the initial part of this block containing the
clip planes.

This optimization was tricky, of dubious value, and not completely
working in the new VS backend, so I've removed it.  Now, when using
the new VS backend, separate parts of the CURBE are used for the
clipper and the vertex shader.  Note that this doesn't affect the
number of push constants available to the vertex shader, it simply
causes the CURBE to occupy a few more bytes of URB memory.

The old VS backend is unaffected.  GEN6+, which does clipping entirely
in hardware, is also unaffected.
---
 src/mesa/drivers/dri/i965/brw_curbe.c          |    9 ++++-
 src/mesa/drivers/dri/i965/brw_vec4.h           |    2 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |   21 ++-----------
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   22 +++++++++++++-
 src/mesa/drivers/dri/i965/brw_vs_state.c       |    7 ++--
 src/mesa/drivers/dri/i965/gen6_vs_state.c      |   37 ++++++++++++-----------
 6 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 2ee2b46..e1676de 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -208,8 +208,13 @@ static void prepare_constant_buffer(struct brw_context *brw)
    }
 
 
-   /* The clipplanes are actually delivered to both CLIP and VS units.
-    * VS uses them to calculate the outcode bitmasks.
+   /* When using the old VS backend, the clipplanes are actually delivered to
+    * both CLIP and VS units.  VS uses them to calculate the outcode bitmasks.
+    *
+    * When using the new VS backend, it is responsible for setting up its own
+    * clipplane constants if it needs them.  This results in a slight waste of
+    * of curbe space, but the advantage is that the new VS backend can use its
+    * general-purpose uniform layout code to store the clipplanes.
     */
    if (brw->curbe.clip_size) {
       GLuint offset = brw->curbe.clip_start * 16;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 876a691..b6864c3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -324,6 +324,7 @@ public:
    int first_non_payload_grf;
    int *virtual_grf_def;
    int *virtual_grf_use;
+   dst_reg userplane[MAX_CLIP_PLANES];
 
    /**
     * This is the size to be used for an array with an element per
@@ -385,6 +386,7 @@ public:
    void fail(const char *msg, ...);
 
    int virtual_grf_alloc(int size);
+   void setup_uniform_clipplane_values();
    int setup_uniform_values(int loc, const glsl_type *type);
    void setup_builtin_uniform_values(ir_variable *ir);
    int setup_attributes(int payload_reg);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index b58ebc6..1eb8f3f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -91,24 +91,6 @@ vec4_visitor::setup_attributes(int payload_reg)
 int
 vec4_visitor::setup_uniforms(int reg)
 {
-   /* User clip planes from curbe:
-    */
-   if (c->key.nr_userclip && !c->key.uses_clip_distance) {
-      if (intel->gen >= 6) {
-	 for (int i = 0; i < c->key.nr_userclip; i++) {
-	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
-						  (i % 2) * 4), 0, 4, 1);
-	 }
-	 reg += ALIGN(c->key.nr_userclip, 2) / 2;
-      } else {
-	 for (int i = 0; i < c->key.nr_userclip; i++) {
-	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
-						  (i % 2) * 4), 0, 4, 1);
-	 }
-	 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
-      }
-   }
-
    /* The pre-gen6 VS requires that some push constants get loaded no
     * matter what, or the GPU would hang.
     */
@@ -598,6 +580,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
 bool
 vec4_visitor::run()
 {
+   if (c->key.nr_userclip && !c->key.uses_clip_distance)
+      setup_uniform_clipplane_values();
+
    /* Generate VS IR for main().  (the visitor only descends into
     * functions called "main").
     */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a32451f..ee3b2a8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -543,6 +543,24 @@ vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
    }
 }
 
+void
+vec4_visitor::setup_uniform_clipplane_values()
+{
+   int compacted_clipplane_index = 0;
+   for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
+      if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
+         this->uniform_vector_size[this->uniforms] = 4;
+         this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
+         this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
+         for (int j = 0; j < 4; ++j) {
+            c->prog_data.param[this->uniforms * 4 + j] = &ctx->Transform._ClipUserPlane[i][j];
+         }
+         ++compacted_clipplane_index;
+         ++this->uniforms;
+      }
+   }
+}
+
 /* Our support for builtin uniforms is even scarier than non-builtin.
  * It sits on top of the PROG_STATE_VAR parameters that are
  * automatically updated from GL context state.
@@ -1767,7 +1785,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
 	 vec4_instruction *inst;
 
 	 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
-                         src_reg(c->userplane[i])));
+                         src_reg(this->userplane[i])));
 	 inst->conditional_mod = BRW_CONDITIONAL_L;
 
 	 emit(OR(header1, src_reg(header1), 1u << i));
@@ -1825,7 +1843,7 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
    for (int i = 0; i + offset < c->key.nr_userclip && i < 4; ++i) {
       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
                src_reg(output_reg[VERT_RESULT_HPOS]),
-               src_reg(c->userplane[i + offset])));
+               src_reg(this->userplane[i + offset])));
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index a01b614..8d86c12 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -92,8 +92,8 @@ brw_prepare_vs_unit(struct brw_context *brw)
    vs->thread3.dispatch_grf_start_reg = 1;
    vs->thread3.urb_entry_read_offset = 0;
 
-   /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
-   if (ctx->Transform.ClipPlanesEnabled) {
+   /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM, BRW_NEW_VERTEX_PROGRAM */
+   if (ctx->Transform.ClipPlanesEnabled && !brw->vs.prog_data->uses_new_param_layout) {
       /* Note that we read in the userclip planes as well, hence
        * clip_start:
        */
@@ -177,7 +177,8 @@ const struct brw_tracked_state brw_vs_unit = {
 		BRW_NEW_PROGRAM_CACHE |
 		BRW_NEW_CURBE_OFFSETS |
                 BRW_NEW_NR_VS_SURFACES |
-		BRW_NEW_URB_FENCE),
+		BRW_NEW_URB_FENCE |
+                BRW_NEW_VERTEX_PROGRAM),
       .cache = CACHE_NEW_VS_PROG
    },
    .prepare = brw_prepare_vs_unit,
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index afb4ace..0f6f6a7 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -66,24 +66,6 @@ gen6_prepare_vs_push_constants(struct brw_context *brw)
 			      4 * sizeof(float),
 			      32, &brw->vs.push_const_offset);
 
-      /* This should be loaded like any other param, but it's ad-hoc
-       * until we redo the VS backend.
-       */
-      if (!uses_clip_distance) {
-         for (i = 0; i < MAX_CLIP_PLANES; i++) {
-            if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
-               memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float));
-               param += 4;
-               params_uploaded++;
-            }
-         }
-      }
-      /* Align to a reg for convenience for brw_vs_emit.c */
-      if (params_uploaded & 1) {
-	 param += 4;
-	 params_uploaded++;
-      }
-
       if (brw->vs.prog_data->uses_new_param_layout) {
 	 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
 	    *param = *brw->vs.prog_data->param[i];
@@ -91,6 +73,25 @@ gen6_prepare_vs_push_constants(struct brw_context *brw)
 	 }
 	 params_uploaded += brw->vs.prog_data->nr_params / 4;
       } else {
+         /* This should be loaded like any other param, but it's ad-hoc
+          * until we redo the VS backend.
+          */
+         if (!uses_clip_distance) {
+            for (i = 0; i < MAX_CLIP_PLANES; i++) {
+               if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
+                  memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float));
+                  param += 4;
+                  params_uploaded++;
+               }
+            }
+         }
+
+         /* Align to a reg for convenience for brw_vs_emit.c */
+         if (params_uploaded & 1) {
+            param += 4;
+            params_uploaded++;
+         }
+
 	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
 	    if (brw->vs.constant_map[i] != -1) {
 	       memcpy(param + brw->vs.constant_map[i] * 4,
-- 
1.7.6.2



More information about the mesa-dev mailing list