[Mesa-dev] [PATCH 8/8] st/mesa: add double input support including lowering (v3)

Dave Airlie airlied at gmail.com
Wed Apr 29 18:14:03 PDT 2015


From: Dave Airlie <airlied at redhat.com>

This takes a different approach to previously, we cannot index into the
inputMapping with anything but the mesa attribute index, so we can't use
the just add one to index trick, we need more info to add one to it
after we've mapped the input.

(Fixed copy propgation and cleaned up a little)

v2: drop float64 format check, just attr->Doubles.
merge enable patch.
v3: cleanup code a bit.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/mesa/state_tracker/st_atom_array.c     | 171 ++++++++++++++++++++++-------
 src/mesa/state_tracker/st_extensions.c     |   4 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  23 +++-
 src/mesa/state_tracker/st_program.c        |   5 +
 src/mesa/state_tracker/st_program.h        |   1 +
 5 files changed, 159 insertions(+), 45 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index d4fb8b8..0f2720b 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -44,7 +44,6 @@
 
 #include "cso_cache/cso_context.h"
 #include "util/u_math.h"
-
 #include "main/bufferobj.h"
 #include "main/glformats.h"
 
@@ -311,6 +310,17 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
    return PIPE_FORMAT_NONE; /* silence compiler warning */
 }
 
+static const struct gl_client_array *
+get_client_array(const struct st_vertex_program *vp,
+                 const struct gl_client_array **arrays,
+                 int attr)
+{
+   const GLuint mesaAttr = vp->index_to_input[attr];
+   /* st_program uses 0xffffffff to denote a double placeholder attribute */
+   if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
+      return NULL;
+   return arrays[mesaAttr];
+}
 /**
  * Examine the active arrays to determine if we have interleaved
  * vertex arrays all living in one VBO, or all living in user space.
@@ -327,11 +337,16 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
    GLboolean userSpaceBuffer = GL_FALSE;
 
    for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      const struct gl_buffer_object *bufObj = array->BufferObj;
-      const GLsizei stride = array->StrideB; /* in bytes */
+      const struct gl_client_array *array;
+      const struct gl_buffer_object *bufObj;
+      GLsizei stride;
 
+      array = get_client_array(vp, arrays, attr);
+      if (!array)
+	 continue;
+
+      stride = array->StrideB; /* in bytes */
+      bufObj = array->BufferObj;
       if (attr == 0) {
          /* save info about the first array */
          firstStride = stride;
@@ -358,6 +373,55 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
    return GL_TRUE;
 }
 
+static void init_velement(struct pipe_vertex_element *velement,
+                          int src_offset, int format,
+                          int instance_divisor, int vbo_index)
+{
+   velement->src_offset = src_offset;
+   velement->src_format = format;
+   velement->instance_divisor = instance_divisor;
+   velement->vertex_buffer_index = vbo_index;
+   assert(velement->src_format);
+}
+
+static void init_velement_lowered(struct st_context *st,
+                                  struct pipe_vertex_element *velements,
+                                  int src_offset, int format,
+                                  int instance_divisor, int vbo_index,
+                                  int nr_components, GLboolean doubles,
+                                  GLuint *attr_idx)
+{
+   int idx = *attr_idx;
+   if (doubles) {
+      int lower_format;
+
+      if (nr_components == 1)
+         lower_format = PIPE_FORMAT_R32G32_UINT;
+      else if (nr_components >= 2)
+         lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+
+      init_velement(&velements[idx], src_offset,
+                    lower_format, instance_divisor, vbo_index);
+      idx++;
+
+      if (nr_components > 2) {
+         if (nr_components == 3)
+            lower_format = PIPE_FORMAT_R32G32_UINT;
+         else if (nr_components >= 4)
+            lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+
+         init_velement(&velements[idx], src_offset + 4 * sizeof(float),
+                       lower_format, instance_divisor, vbo_index);
+         idx++;
+      }
+   } else {
+      init_velement(&velements[idx], src_offset,
+                    format, instance_divisor, vbo_index);
+      idx++;
+   }
+   *attr_idx = idx;
+}
+
 /**
  * Set up for drawing interleaved arrays that all live in one VBO
  * or all live in user space.
@@ -365,13 +429,15 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
  * \param velements  returns vertex element info
  */
 static boolean
-setup_interleaved_attribs(const struct st_vertex_program *vp,
+setup_interleaved_attribs(struct st_context *st,
+                          const struct st_vertex_program *vp,
                           const struct st_vp_variant *vpv,
                           const struct gl_client_array **arrays,
                           struct pipe_vertex_buffer *vbuffer,
-                          struct pipe_vertex_element velements[])
+                          struct pipe_vertex_element velements[],
+                          unsigned *num_velements)
 {
-   GLuint attr;
+   GLuint attr, attr_idx;
    const GLubyte *low_addr = NULL;
    GLboolean usingVBO;      /* all arrays in a VBO? */
    struct gl_buffer_object *bufobj;
@@ -381,8 +447,10 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
     * Init bufobj and stride.
     */
    if (vpv->num_inputs) {
-      const GLuint mesaAttr0 = vp->index_to_input[0];
-      const struct gl_client_array *array = arrays[mesaAttr0];
+      const struct gl_client_array *array;
+
+      array = get_client_array(vp, arrays, 0);
+      assert(array);
 
       /* Since we're doing interleaved arrays, we know there'll be at most
        * one buffer object and the stride will be the same for all arrays.
@@ -394,7 +462,11 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
       low_addr = arrays[vp->index_to_input[0]]->Ptr;
 
       for (attr = 1; attr < vpv->num_inputs; attr++) {
-         const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
+         const GLubyte *start;
+         array = get_client_array(vp, arrays, attr);
+         if (!array)
+            continue;
+         start = array->Ptr;
          low_addr = MIN2(low_addr, start);
       }
    }
@@ -408,25 +480,33 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
    /* are the arrays in user space? */
    usingVBO = _mesa_is_bufferobj(bufobj);
 
+   attr_idx = 0;
    for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      unsigned src_offset = (unsigned) (array->Ptr - low_addr);
+      const struct gl_client_array *array;
+      unsigned src_offset;
+      unsigned src_format;
+
+      array = get_client_array(vp, arrays, attr);
+      if (!array)
+         continue;
 
+      src_offset = (unsigned) (array->Ptr - low_addr);
       assert(array->_ElementSize ==
              _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
 
-      velements[attr].src_offset = src_offset;
-      velements[attr].instance_divisor = array->InstanceDivisor;
-      velements[attr].vertex_buffer_index = 0;
-      velements[attr].src_format = st_pipe_vertex_format(array->Type,
-                                                         array->Size,
-                                                         array->Format,
-                                                         array->Normalized,
-                                                         array->Integer);
-      assert(velements[attr].src_format);
+      src_format = st_pipe_vertex_format(array->Type,
+                                         array->Size,
+                                         array->Format,
+                                         array->Normalized,
+                                         array->Integer);
+
+      init_velement_lowered(st, velements, src_offset, src_format,
+                            array->InstanceDivisor, 0,
+                            array->Size, array->Doubles, &attr_idx);
    }
 
+   *num_velements = attr_idx;
+
    /*
     * Return the vbuffer info and setup user-space attrib info, if needed.
     */
@@ -472,17 +552,25 @@ setup_non_interleaved_attribs(struct st_context *st,
                               const struct st_vp_variant *vpv,
                               const struct gl_client_array **arrays,
                               struct pipe_vertex_buffer vbuffer[],
-                              struct pipe_vertex_element velements[])
+                              struct pipe_vertex_element velements[],
+                              unsigned *num_velements)
 {
    struct gl_context *ctx = st->ctx;
-   GLuint attr;
+   GLuint attr, attr_idx = 0;
 
    for (attr = 0; attr < vpv->num_inputs; attr++) {
       const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      struct gl_buffer_object *bufobj = array->BufferObj;
-      GLsizei stride = array->StrideB;
+      const struct gl_client_array *array;
+      struct gl_buffer_object *bufobj;
+      GLsizei stride;
+      unsigned src_format;
 
+      array = get_client_array(vp, arrays, attr);
+      if (!array)
+         continue;
+
+      stride = array->StrideB;
+      bufobj = array->BufferObj;
       assert(array->_ElementSize ==
              _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
 
@@ -524,16 +612,19 @@ setup_non_interleaved_attribs(struct st_context *st,
       /* common-case setup */
       vbuffer[attr].stride = stride; /* in bytes */
 
-      velements[attr].src_offset = 0;
-      velements[attr].instance_divisor = array->InstanceDivisor;
-      velements[attr].vertex_buffer_index = attr;
-      velements[attr].src_format = st_pipe_vertex_format(array->Type,
-                                                         array->Size,
-                                                         array->Format,
-                                                         array->Normalized,
-                                                         array->Integer);
-      assert(velements[attr].src_format);
+      src_format = st_pipe_vertex_format(array->Type,
+                                         array->Size,
+                                         array->Format,
+                                         array->Normalized,
+                                         array->Integer);
+
+      init_velement_lowered(st, velements, 0, src_format,
+                            array->InstanceDivisor, attr,
+                            array->Size, array->Doubles, &attr_idx);
+
    }
+
+   *num_velements = attr_idx;
    return TRUE;
 }
 
@@ -563,25 +654,23 @@ static void update_array(struct st_context *st)
     * Setup the vbuffer[] and velements[] arrays.
     */
    if (is_interleaved_arrays(vp, vpv, arrays)) {
-      if (!setup_interleaved_attribs(vp, vpv, arrays, vbuffer, velements)) {
+      if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements, &num_velements)) {
          st->vertex_array_out_of_memory = TRUE;
          return;
       }
 
       num_vbuffers = 1;
-      num_velements = vpv->num_inputs;
       if (num_velements == 0)
          num_vbuffers = 0;
    }
    else {
       if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer,
-                                         velements)) {
+                                         velements, &num_velements)) {
          st->vertex_array_out_of_memory = TRUE;
          return;
       }
 
       num_vbuffers = vpv->num_inputs;
-      num_velements = vpv->num_inputs;
    }
 
    cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer);
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 82e4a30..b1057f3 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -909,6 +909,8 @@ void st_init_extensions(struct pipe_screen *screen,
    if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
                                 PIPE_SHADER_CAP_DOUBLES) &&
        screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
-                                PIPE_SHADER_CAP_DOUBLES))
+                                PIPE_SHADER_CAP_DOUBLES)) {
       extensions->ARB_gpu_shader_fp64 = GL_TRUE;
+      extensions->ARB_vertex_attrib_64bit = GL_TRUE;
+   }
 }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 08957dc..70fbfb1 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -88,6 +88,7 @@ public:
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
+      this->double_reg2 = false;
    }
 
    st_src_reg(gl_register_file file, int index, int type)
@@ -101,6 +102,7 @@ public:
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
+      this->double_reg2 = false;
    }
 
    st_src_reg(gl_register_file file, int index, int type, int index2D)
@@ -114,6 +116,7 @@ public:
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
+      this->double_reg2 = false;
    }
 
    st_src_reg()
@@ -127,6 +130,7 @@ public:
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
+      this->double_reg2 = false;
    }
 
    explicit st_src_reg(st_dst_reg reg);
@@ -141,6 +145,7 @@ public:
    st_src_reg *reladdr;
    st_src_reg *reladdr2;
    bool has_index2;
+   bool double_reg2;
 };
 
 class st_dst_reg {
@@ -197,6 +202,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
    this->index2D = 0;
    this->reladdr2 = NULL;
    this->has_index2 = false;
+   this->double_reg2 = false;
 }
 
 st_dst_reg::st_dst_reg(st_src_reg reg)
@@ -677,8 +683,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 
             if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
                dinst->src[j].index = initial_src_idx[j];
-               if (swz > 1)
+               if (swz > 1) {
+                  dinst->src[j].double_reg2 = true;
                   dinst->src[j].index++;
+	       }
 
                if (swz & 1)
                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
@@ -3705,6 +3713,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
             } else {
                if (first->src[0].file != copy_chan->src[0].file ||
                    first->src[0].index != copy_chan->src[0].index ||
+                   first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
                    first->src[0].index2D != copy_chan->src[0].index2D) {
                   good = false;
                   break;
@@ -3720,6 +3729,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
             inst->src[r].index = first->src[0].index;
             inst->src[r].index2D = first->src[0].index2D;
             inst->src[r].has_index2 = first->src[0].has_index2;
+            inst->src[r].double_reg2 = first->src[0].double_reg2;
 
             int swizzle = 0;
             for (int i = 0; i < 4; i++) {
@@ -4552,6 +4562,9 @@ dst_register(struct st_translate *t,
 static struct ureg_src
 src_register(struct st_translate *t, const st_src_reg *reg)
 {
+   int index = reg->index;
+   int double_reg2 = reg->double_reg2 ? 1 : 0;
+
    switch(reg->file) {
    case PROGRAM_UNDEFINED:
       return ureg_imm4f(t->ureg, 0, 0, 0, 0);
@@ -4577,8 +4590,12 @@ src_register(struct st_translate *t, const st_src_reg *reg)
       return t->immediates[reg->index];
 
    case PROGRAM_INPUT:
-      assert(t->inputMapping[reg->index] < ARRAY_SIZE(t->inputs));
-      return t->inputs[t->inputMapping[reg->index]];
+      /* GLSL inputs are 64-bit containers, so we have to
+       * map back to the original index and add the offset after
+       * mapping. */
+      index -= double_reg2;
+      assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
+      return t->inputs[t->inputMapping[index] + double_reg2];
 
    case PROGRAM_OUTPUT:
       assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs));
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index d93b3c7..a9110d3 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -194,6 +194,11 @@ st_prepare_vertex_program(struct gl_context *ctx,
          stvp->input_to_index[attr] = stvp->num_inputs;
          stvp->index_to_input[stvp->num_inputs] = attr;
          stvp->num_inputs++;
+         if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
+            /* add placeholder for second part of a double attribute */
+            stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
+            stvp->num_inputs++;
+         }
       }
    }
    /* bit of a hack, presetup potentially unused edgeflag input */
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index b2c86fa..a2c5606 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -45,6 +45,7 @@
 extern "C" {
 #endif
 
+#define ST_DOUBLE_ATTRIB_PLACEHOLDER 0xffffffff
 
 /** Fragment program variant key */
 struct st_fp_variant_key
-- 
2.1.0



More information about the mesa-dev mailing list