[Mesa-dev] [PATCH] i965/gen8: Fix vertex attrib upload for dvec3/4 shader inputs

Antia Puentes apuentes at igalia.com
Mon Oct 31 17:22:43 UTC 2016


The emission of vertex attributes corresponding to dvec3 and dvec4
vertex shader input variables was not correct when the <size> passed
to the VertexAttribL* commands was <= 2.

This was because we were using the vertex array size when emitting vertices
to decide if we uploaded a 64-bit floating point attribute as 1 slot (128-bits)
for sizes 1 and 2, or 2 slots (256-bits) for sizes 3 and 4. This caused problems
when mapping the input variables to registers because, for deciding which
registers contain the values uploaded for a certain variable, we use the size
and type given to the variable in the shader, so we will be assigning 256-bits
to dvec3/4 variables, even if we only uploaded 128-bits for them, which happened
when the vertex array size was <= 2.

The patch uses the shader information to only emit as 128-bits those 64-bit floating
point variables that were declared as double or dvec2 in the vertex shader. Dvec3 and
dvec4 variables will be always uploaded as 256-bits, independently of the <size> given
to the VertexAttribL* command.

>From the ARB_vertex_attrib_64bit specification:

   "For the 64-bit double precision types listed in Table X.1, no default
    attribute values are provided if the values of the vertex attribute variable
    are specified with fewer components than required for the attribute
    variable. For example, the fourth component of a variable of type dvec4
    will be undefined if specified using VertexAttribL3dv or using a vertex
    array specified with VertexAttribLPointer and a size of three."

We are filling these unspecified components with zeros, which coincidentally is
also what the GL44-CTS.vertex_attrib_binding.basic-inputL-case1 expects.

Fixes: GL44-CTS.vertex_attrib_binding.basic-inputL-case1 test

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97287
---
 src/mesa/drivers/dri/i965/brw_compiler.h     |  1 +
 src/mesa/drivers/dri/i965/brw_context.h      |  2 +-
 src/mesa/drivers/dri/i965/brw_draw_upload.c  |  4 +++-
 src/mesa/drivers/dri/i965/brw_vs.c           |  1 +
 src/mesa/drivers/dri/i965/gen8_draw_upload.c | 35 ++++++++++++----------------
 5 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 819c7d6..c2400f9 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -641,6 +641,7 @@ struct brw_vs_prog_data {
    struct brw_vue_prog_data base;
 
    GLbitfield64 inputs_read;
+   GLbitfield64 double_inputs_read;
 
    unsigned nr_attributes;
    unsigned nr_attribute_slots;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 308ba99..310372a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -535,7 +535,7 @@ struct brw_vertex_element {
    const struct gl_vertex_array *glarray;
 
    int buffer;
-
+   bool is_dual_slot;
    /** Offset of the first element within the buffer object */
    unsigned int offset;
 };
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index da13e7a..19c8065 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -472,7 +472,9 @@ brw_prepare_vertices(struct brw_context *brw)
    while (vs_inputs) {
       GLuint index = ffsll(vs_inputs) - 1;
       struct brw_vertex_element *input = &brw->vb.inputs[index];
-
+      input->is_dual_slot =
+         (brw->gen >= 8 && _mesa_bitcount_64(vs_prog_data->double_inputs_read &
+                                             BITFIELD64_BIT(index)));
       vs_inputs &= ~BITFIELD64_BIT(index);
       brw->vb.enabled[brw->vb.nr_enabled++] = input;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 842c516..02a88ca 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -151,6 +151,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
    uint64_t outputs_written =
       brw_vs_outputs_written(brw, key, vp->program.info.outputs_written);
    prog_data.inputs_read = vp->program.info.inputs_read;
+   prog_data.double_inputs_read = vp->program.info.double_inputs_read;
 
    if (key->copy_edgeflag) {
       prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
index 23c7587..69ba8e9 100644
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -230,8 +230,15 @@ gen8_emit_vertices(struct brw_context *brw)
       case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
       case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
       case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
-      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
-                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
+      case 3:
+         if (input->glarray->Doubles) {
+            comp3 = BRW_VE1_COMPONENT_STORE_0;
+         } else if (input->glarray->Integer) {
+            comp3 = BRW_VE1_COMPONENT_STORE_1_INT;
+         } else {
+            comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+         }
+
          break;
       }
 
@@ -250,24 +257,12 @@ gen8_emit_vertices(struct brw_context *brw)
        *     to be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
        *     element."
        */
-      if (input->glarray->Doubles) {
-         switch (input->glarray->Size) {
-         case 0:
-         case 1:
-         case 2:
-            /*  Use 128-bits instead of 256-bits to write double and dvec2
-             *  vertex elements.
-             */
-            comp2 = BRW_VE1_COMPONENT_NOSTORE;
-            comp3 = BRW_VE1_COMPONENT_NOSTORE;
-            break;
-         case 3:
-            /* Pad the output using VFCOMP_STORE_0 as suggested
-             * by the BDW PRM.
-             */
-            comp3 = BRW_VE1_COMPONENT_STORE_0;
-            break;
-         }
+      if (input->glarray->Doubles && !input->is_dual_slot) {
+         /* Store vertex elements which correspond to double and dvec2 vertex
+          * shader inputs as 128-bit vertex elements, instead of 256-bits.
+          */
+         comp2 = BRW_VE1_COMPONENT_NOSTORE;
+         comp3 = BRW_VE1_COMPONENT_NOSTORE;
       }
 
       OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
-- 
2.8.1



More information about the mesa-dev mailing list