[Mesa-dev] [PATCH] i965: Fix Vertex URB Read Length calculation in 3DSTATE_SF on Gen6.

Jordan Justen jljusten at gmail.com
Sat Feb 2 11:30:40 PST 2013


On Sat, Feb 2, 2013 at 3:44 AM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> The old calculation was off by one in certain cases.  The documentation
> contains a precise formula for how to calculate it, so just use that.
>
> Fixes random corruption in Steam's Big Picture Mode, random corruption
> in PlaneShift (since the varying reordering code landed), and Piglit
> test ________.

Tested-by: Jordan Justen <jordan.l.justen at intel.com>
with my fbo-5-varyings test.

> NOTE: This is a candidate for all stable branches.
>
> (The code needs to get cleaned up - the final result is quite ugly - but
>  I wanted to put it on the list for the people working on these bugs.)
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56920
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=60172
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/gen6_sf_state.c | 52 +++++++++++++++++++++----------
>  1 file changed, 35 insertions(+), 17 deletions(-)
>
> I don't expect this to go upstream as is; it should get cleaned up first.
>
> We also should do the same thing for Ivybridge.  Haven't tried that yet.

I haven't been able to reproduce on Ivy Bridge yet.

-Jordan

> diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> index c1bc252..83ff79c 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> @@ -113,7 +113,6 @@ upload_sf_state(struct brw_context *brw)
>  {
>     struct intel_context *intel = &brw->intel;
>     struct gl_context *ctx = &intel->ctx;
> -   uint32_t urb_entry_read_length;
>     /* BRW_NEW_FRAGMENT_PROGRAM */
>     uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
>     /* _NEW_LIGHT */
> @@ -125,26 +124,15 @@ upload_sf_state(struct brw_context *brw)
>     bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
>
>     int attr = 0, input_index = 0;
> +   /* Skip over the first two entries in the VUE map (position and point size),
> +    * as they're passed through anyway and reading them is just overhead.
> +    */
>     int urb_entry_read_offset = 1;
>     float point_size;
>     uint16_t attr_overrides[FRAG_ATTRIB_MAX];
>     uint32_t point_sprite_origin;
>
> -   /* CACHE_NEW_VS_PROG */
> -   urb_entry_read_length = ((brw->vs.prog_data->vue_map.num_slots + 1) / 2 -
> -                           urb_entry_read_offset);
> -   if (urb_entry_read_length == 0) {
> -      /* Setting the URB entry read length to 0 causes undefined behavior, so
> -       * if we have no URB data to read, set it to 1.
> -       */
> -      urb_entry_read_length = 1;
> -   }
> -
> -   dw1 =
> -      GEN6_SF_SWIZZLE_ENABLE |
> -      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
> -      urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
> -      urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
> +   dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
>
>     dw2 = GEN6_SF_STATISTICS_ENABLE |
>           GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
> @@ -280,6 +268,7 @@ upload_sf_state(struct brw_context *brw)
>     /* Create the mapping from the FS inputs we produce to the VS outputs
>      * they source from.
>      */
> +   uint32_t max_source_attr = 0;
>     for (; attr < FRAG_ATTRIB_MAX; attr++) {
>        enum glsl_interp_qualifier interp_qualifier =
>           brw->fragment_program->InterpQualifier[attr];
> @@ -312,12 +301,41 @@ upload_sf_state(struct brw_context *brw)
>        assert(input_index < 16 || attr == input_index);
>
>        /* CACHE_NEW_VS_PROG | _NEW_LIGHT | _NEW_PROGRAM */
> -      attr_overrides[input_index++] =
> +      attr_overrides[input_index] =
>           get_attr_override(&brw->vs.prog_data->vue_map,
>                            urb_entry_read_offset, attr,
>                             ctx->VertexProgram._TwoSideEnabled);
> +      bool swizzling = (attr_overrides[input_index] & 0xc0) != 0;
> +      uint32_t source_attr =
> +         (attr_overrides[input_index] & 0x1f) + (swizzling ? 1 : 0);
> +
> +      if (source_attr > max_source_attr)
> +         max_source_attr = source_attr;
> +
> +      ++input_index;
>     }
>
> +   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
> +    * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
> +    *
> +    * "This field should be set to the minimum length required to read the
> +    *  maximum source attribute.  The maximum source attribute is indicated
> +    *  by the maximum value of the enabled Attribute # Source Attribute if
> +    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
> +    *  enable is not set.
> +    *  read_length = ceiling((max_source_attr + 1) / 2)
> +    *
> +    *  [errata] Corruption/Hang possible if length programmed larger than
> +    *  recommended"
> +    *
> +    * We use Attribute Swizzle Enable, so max_source_attr is bits 4:0 of the
> +    * last element of attr_overrides set above.  Using ALIGN(x, 2) guarantees
> +    * that the division will produce ceiling() rather than truncation.
> +    */
> +   uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
> +   dw1 |= urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
> +          urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
> +
>     for (; input_index < FRAG_ATTRIB_MAX; input_index++)
>        attr_overrides[input_index] = 0;
>
> --
> 1.8.1.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list