[Mesa-dev] [PATCH] i965: Fix Vertex URB Read Length calculation in 3DSTATE_SF on Gen6.

Kenneth Graunke kenneth at whitecape.org
Sat Feb 2 03:44:41 PST 2013


The old calculation was off by one in certain cases.  The documentation
contains a precise formula for how to calculate it, so just use that.

Fixes random corruption in Steam's Big Picture Mode, random corruption
in PlaneShift (since the varying reordering code landed), and Piglit
test ________.

NOTE: This is a candidate for all stable branches.

(The code needs to get cleaned up - the final result is quite ugly - but
 I wanted to put it on the list for the people working on these bugs.)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56920
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=60172
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/gen6_sf_state.c | 52 +++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 17 deletions(-)

I don't expect this to go upstream as is; it should get cleaned up first.

We also should do the same thing for Ivybridge.  Haven't tried that yet.

diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index c1bc252..83ff79c 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -113,7 +113,6 @@ upload_sf_state(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
-   uint32_t urb_entry_read_length;
    /* BRW_NEW_FRAGMENT_PROGRAM */
    uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
    /* _NEW_LIGHT */
@@ -125,26 +124,15 @@ upload_sf_state(struct brw_context *brw)
    bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
 
    int attr = 0, input_index = 0;
+   /* Skip over the first two entries in the VUE map (position and point size),
+    * as they're passed through anyway and reading them is just overhead.
+    */
    int urb_entry_read_offset = 1;
    float point_size;
    uint16_t attr_overrides[FRAG_ATTRIB_MAX];
    uint32_t point_sprite_origin;
 
-   /* CACHE_NEW_VS_PROG */
-   urb_entry_read_length = ((brw->vs.prog_data->vue_map.num_slots + 1) / 2 -
-			    urb_entry_read_offset);
-   if (urb_entry_read_length == 0) {
-      /* Setting the URB entry read length to 0 causes undefined behavior, so
-       * if we have no URB data to read, set it to 1.
-       */
-      urb_entry_read_length = 1;
-   }
-
-   dw1 =
-      GEN6_SF_SWIZZLE_ENABLE |
-      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
-      urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
-      urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+   dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
 
    dw2 = GEN6_SF_STATISTICS_ENABLE |
          GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
@@ -280,6 +268,7 @@ upload_sf_state(struct brw_context *brw)
    /* Create the mapping from the FS inputs we produce to the VS outputs
     * they source from.
     */
+   uint32_t max_source_attr = 0;
    for (; attr < FRAG_ATTRIB_MAX; attr++) {
       enum glsl_interp_qualifier interp_qualifier =
          brw->fragment_program->InterpQualifier[attr];
@@ -312,12 +301,41 @@ upload_sf_state(struct brw_context *brw)
       assert(input_index < 16 || attr == input_index);
 
       /* CACHE_NEW_VS_PROG | _NEW_LIGHT | _NEW_PROGRAM */
-      attr_overrides[input_index++] =
+      attr_overrides[input_index] =
          get_attr_override(&brw->vs.prog_data->vue_map,
 			   urb_entry_read_offset, attr,
                            ctx->VertexProgram._TwoSideEnabled);
+      bool swizzling = (attr_overrides[input_index] & 0xc0) != 0;
+      uint32_t source_attr =
+         (attr_overrides[input_index] & 0x1f) + (swizzling ? 1 : 0);
+
+      if (source_attr > max_source_attr)
+         max_source_attr = source_attr;
+
+      ++input_index;
    }
 
+   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
+    * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
+    *
+    * "This field should be set to the minimum length required to read the
+    *  maximum source attribute.  The maximum source attribute is indicated
+    *  by the maximum value of the enabled Attribute # Source Attribute if
+    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+    *  enable is not set.
+    *  read_length = ceiling((max_source_attr + 1) / 2)
+    *
+    *  [errata] Corruption/Hang possible if length programmed larger than
+    *  recommended"
+    *
+    * We use Attribute Swizzle Enable, so max_source_attr is bits 4:0 of the
+    * last element of attr_overrides set above.  Using ALIGN(x, 2) guarantees
+    * that the division will produce ceiling() rather than truncation.
+    */
+   uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
+   dw1 |= urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+          urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+
    for (; input_index < FRAG_ATTRIB_MAX; input_index++)
       attr_overrides[input_index] = 0;
 
-- 
1.8.1.2



More information about the mesa-dev mailing list