[Mesa-dev] [PATCH 18/20] mesa: Rewrite the way uniforms are tracked and handled

Tue Jan 3 21:43:05 PST 2012

On Fri, Oct 28, 2011 at 7:42 PM, Ian Romanick <idr at freedesktop.org> wrote:
> diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
> index db2f200..50a724b 100644
> --- a/src/mesa/main/uniform_query.cpp
> +++ b/src/mesa/main/uniform_query.cpp
> @@ -22,15 +22,16 @@
>  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
>  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  */
> +#include <stdlib.h>
>  #include "main/core.h"
>  #include "main/context.h"
>  #include "ir.h"
>  #include "ir_uniform.h"
> +#include "program/hash_table.h"
>  #include "../glsl/program.h"
>  #include "../glsl/ir_uniform.h"
>
>  extern "C" {
> -#include "main/image.h"
>  #include "main/shaderapi.h"
>  #include "main/shaderobj.h"
>  #include "uniforms.h"
> @@ -44,42 +45,30 @@ _mesa_GetActiveUniformARB(GLhandleARB program, GLuint index,
>    GET_CURRENT_CONTEXT(ctx);
>    struct gl_shader_program *shProg =
>       _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
> -   const struct gl_program_parameter *param;
>
>    if (!shProg)
>       return;
>
> -   if (!shProg->Uniforms || index >= shProg->Uniforms->NumUniforms) {
> +   if (index >= shProg->NumUserUniformStorage) {
>       _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(index)");
>       return;
>    }
>
> -   param = get_uniform_parameter(shProg, index);
> -   if (!param)
> -      return;
> -
> -   const struct gl_uniform *const uni = &shProg->Uniforms->Uniforms[index];
> +   const struct gl_uniform_storage *const uni = &shProg->UniformStorage[index];
>
>    if (nameOut) {
> -      _mesa_copy_string(nameOut, maxLength, length, param->Name);
> +      _mesa_copy_string(nameOut, maxLength, length, uni->name);
>    }
>
>    if (size) {
> -      GLint typeSize = _mesa_sizeof_glsl_type(uni->Type->gl_type);
> -      if ((GLint) param->Size > typeSize) {
> -         /* This is an array.
> -          * Array elements are placed on vector[4] boundaries so they're
> -          * a multiple of four floats.  We round typeSize up to next multiple
> -          * of four to get the right size below.
> -          */
> -         typeSize = (typeSize + 3) & ~3;
> -      }
> -      /* Note that the returned size is in units of the <type>, not bytes */
> -      *size = param->Size / typeSize;
> +      /* array_elements is zero for non-arrays, but the API requires that 1 be
> +       * returned.
> +       */
> +      *size = MAX2(1, uni->array_elements);
>    }
>
>    if (type) {
> -      *type = uni->Type->gl_type;
> +      *type = uni->type->gl_type;
>    }
>  }
>
> @@ -409,12 +398,21 @@ validate_uniform_parameters(struct gl_context *ctx,
>
>    _mesa_uniform_split_location_offset(location, loc, array_index);
>
> -   if (*loc >= shProg->Uniforms->NumUniforms) {
> +   if (*loc >= shProg->NumUserUniformStorage) {
>       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
>                  caller, location);
>       return false;
>    }
>
> +   /* This case should be impossible.  The implication is that a call like
> +    * glGetUniformLocation(prog, "foo[8]") was successful but "foo" is not an
> +    * array.
> +    */
> +   if (*array_index != 0 && shProg->UniformStorage[*loc].array_elements == 0) {
> +      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
> +                 caller, location);
> +      return false;
> +   }
>    return true;
>  }
>
> @@ -423,72 +421,81 @@ validate_uniform_parameters(struct gl_context *ctx,
>  */
>  extern "C" void
>  _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
> -                 GLsizei bufSize, GLenum returnType, GLvoid *paramsOut)
> +                 GLsizei bufSize, enum glsl_base_type returnType,
> +                 GLvoid *paramsOut)
>  {
>    struct gl_shader_program *shProg =
>       _mesa_lookup_shader_program_err(ctx, program, "glGetUniformfv");
> -   struct gl_program *prog;
> -   GLint paramPos;
> +   struct gl_uniform_storage *uni;
>    unsigned loc, offset;
>
>    if (!validate_uniform_parameters(ctx, shProg, location, 1,
>                                    &loc, &offset, "glGetUniform", true))
>       return;
>
> -   if (!find_uniform_parameter_pos(shProg, loc, &prog, &paramPos)) {
> -      _mesa_error(ctx, GL_INVALID_OPERATION,  "glGetUniformfv(location)");
> -   }
> -   else {
> -      const struct gl_program_parameter *p =
> -         &prog->Parameters->Parameters[paramPos];
> -      gl_constant_value (*values)[4];
> -      GLint rows, cols, i, j, k;
> -      GLsizei numBytes;
> -      GLenum storage_type;
> -
> -      values = prog->Parameters->ParameterValues + paramPos + offset;
> -
> -      get_uniform_rows_cols(p, &rows, &cols);
> -
> -      numBytes = rows * cols * _mesa_sizeof_type(returnType);
> -      if (bufSize < numBytes) {
> -         _mesa_error( ctx, GL_INVALID_OPERATION,
> -                     "glGetnUniformfvARB(out of bounds: bufSize is %d,"
> -                     " but %d bytes are required)", bufSize, numBytes );
> -         return;
> -      }
> +   uni = &shProg->UniformStorage[loc];
>
> -      if (ctx->Const.NativeIntegers) {
> -        storage_type = base_uniform_type(p->DataType);
> -      } else {
> -        storage_type = GL_FLOAT;
> -      }
> +   {
> +      unsigned elements = (uni->type->is_sampler())
> +        ? 1 : uni->type->components();
>
> -      k = 0;
> -      for (i = 0; i < rows; i++) {
> -        for (j = 0; j < cols; j++ ) {
> -           void *out = (char *)paramsOut + 4 * k;
> +      /* Calculate the source base address *BEFORE* modifying elements to
> +       * account for the size of the user's buffer.
> +       */
> +      const union gl_constant_value *const src =
> +        &uni->storage[offset * elements];
> +
> +      unsigned bytes = sizeof(uni->storage[0]) * elements;
> +      if (bytes > bufSize) {
> +        elements = bufSize / sizeof(uni->storage[0]);
> +        bytes = bufSize;
> +      }
>
> +      /* If the return type and the uniform's native type are "compatible,"
> +       * just memcpy the data.  If the types are not compatible, perform a
> +       * slower convert-and-copy process.
> +       */
> +      if (returnType == uni->type->base_type
> +         || ((returnType == GLSL_TYPE_INT
> +              || returnType == GLSL_TYPE_UINT
> +              || returnType == GLSL_TYPE_SAMPLER)
> +             &&
> +             (uni->type->base_type == GLSL_TYPE_INT
> +              || uni->type->base_type == GLSL_TYPE_UINT
> +              || uni->type->base_type == GLSL_TYPE_SAMPLER))) {
> +        memcpy(paramsOut, src, bytes);
> +      } else {
> +        union gl_constant_value *const dst =
> +           (union gl_constant_value *) paramsOut;
> +
> +        /* This code could be optimized by putting the loop inside the switch
> +         * statements.  However, this is not expected to be
> +         * performance-critical code.
> +         */
> +        for (unsigned i = 0; i < elements; i++) {
>            switch (returnType) {
> -           case GL_FLOAT:
> -              switch (storage_type) {
> -              case GL_FLOAT:
> -                 *(float *)out = values[i][j].f;
> +           case GLSL_TYPE_FLOAT:
> +              switch (uni->type->base_type) {
> +              case GLSL_TYPE_UINT:
> +                 dst[i].f = (float) src[i].u;
> +                 break;
> +              case GLSL_TYPE_INT:
> +              case GLSL_TYPE_SAMPLER:
> +                 dst[i].f = (float) src[i].i;
>                  break;
> -              case GL_INT:
> -              case GL_BOOL: /* boolean is just an integer 1 or 0. */
> -                 *(float *)out = values[i][j].i;
> +              case GLSL_TYPE_BOOL:
> +                 dst[i].f = src[i].i ? 1.0f : 0.0f;
>                  break;
> -              case GL_UNSIGNED_INT:
> -                 *(float *)out = values[i][j].u;
> +              default:
> +                 assert(!"Should not get here.");
>                  break;
>               }
>               break;
>
> -           case GL_INT:
> -           case GL_UNSIGNED_INT:
> -              switch (storage_type) {
> -              case GL_FLOAT:
> +           case GLSL_TYPE_INT:
> +           case GLSL_TYPE_UINT:
> +              switch (uni->type->base_type) {
> +              case GLSL_TYPE_FLOAT:
>                  /* While the GL 3.2 core spec doesn't explicitly
>                   * state how conversion of float uniforms to integer
>                   * values works, in section 6.2 "State Tables" on
> @@ -506,23 +513,21 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
>                   *      a floating-point value is rounded to the
>                   *      nearest integer..."
>                   */
> -                 *(int *)out = IROUND(values[i][j].f);
> +                 dst[i].i = IROUND(src[i].f);
>                  break;
> -
> -              case GL_INT:
> -              case GL_UNSIGNED_INT:
> -              case GL_BOOL:
> -                 /* type conversions for these to int/uint are just
> -                  * copying the data.
> -                  */
> -                 *(int *)out = values[i][j].i;
> +              case GLSL_TYPE_BOOL:
> +                 dst[i].i = src[i].i ? 1 : 0;
>                  break;
> +              default:
> +                 assert(!"Should not get here.");
>                  break;
>               }
>               break;
> -           }
>
> -           k++;
> +           default:
> +              assert(!"Should not get here.");
> +              break;
> +           }
>         }
>       }
>    }
> @@ -925,9 +930,12 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
>              GLint location, GLsizei count,
>               const GLvoid *values, GLenum type)
>  {
> -   struct gl_uniform *uniform;
> -   GLint elems;
>    unsigned loc, offset;
> +   unsigned components;
> +   unsigned src_components;
> +   unsigned i;
> +   enum glsl_base_type basicType;
> +   struct gl_uniform_storage *uni;
>
>    ASSERT_OUTSIDE_BEGIN_END(ctx);
>
> @@ -935,73 +943,207 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
>                                    &loc, &offset, "glUniform", false))
>       return;
>
> -   elems = _mesa_sizeof_glsl_type(type);
> +   uni = &shProg->UniformStorage[loc];
>
> -   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
> +   /* Verify that the types are compatible.
> +    */
> +   switch (type) {
> +   case GL_FLOAT:
> +      basicType = GLSL_TYPE_FLOAT;
> +      src_components = 1;
> +      break;
> +   case GL_FLOAT_VEC2:
> +      basicType = GLSL_TYPE_FLOAT;
> +      src_components = 2;
> +      break;
> +   case GL_FLOAT_VEC3:
> +      basicType = GLSL_TYPE_FLOAT;
> +      src_components = 3;
> +      break;
> +   case GL_FLOAT_VEC4:
> +      basicType = GLSL_TYPE_FLOAT;
> +      src_components = 4;
> +      break;
> +   case GL_UNSIGNED_INT:
> +      basicType = GLSL_TYPE_UINT;
> +      src_components = 1;
> +      break;
> +   case GL_UNSIGNED_INT_VEC2:
> +      basicType = GLSL_TYPE_UINT;
> +      src_components = 2;
> +      break;
> +   case GL_UNSIGNED_INT_VEC3:
> +      basicType = GLSL_TYPE_UINT;
> +      src_components = 3;
> +      break;
> +   case GL_UNSIGNED_INT_VEC4:
> +      basicType = GLSL_TYPE_UINT;
> +      src_components = 4;
> +      break;
> +   case GL_INT:
> +      basicType = GLSL_TYPE_INT;
> +      src_components = 1;
> +      break;
> +   case GL_INT_VEC2:
> +      basicType = GLSL_TYPE_INT;
> +      src_components = 2;
> +      break;
> +   case GL_INT_VEC3:
> +      basicType = GLSL_TYPE_INT;
> +      src_components = 3;
> +      break;
> +   case GL_INT_VEC4:
> +      basicType = GLSL_TYPE_INT;
> +      src_components = 4;
> +      break;
> +   case GL_BOOL:
> +   case GL_BOOL_VEC2:
> +   case GL_BOOL_VEC3:
> +   case GL_BOOL_VEC4:
> +   case GL_FLOAT_MAT2:
> +   case GL_FLOAT_MAT2x3:
> +   case GL_FLOAT_MAT2x4:
> +   case GL_FLOAT_MAT3x2:
> +   case GL_FLOAT_MAT3:
> +   case GL_FLOAT_MAT3x4:
> +   case GL_FLOAT_MAT4x2:
> +   case GL_FLOAT_MAT4x3:
> +   case GL_FLOAT_MAT4:
> +   default:
> +      _mesa_problem(NULL, "Invalid type in %s", __func__);
> +      return;
> +   }
>
> -   uniform = &shProg->Uniforms->Uniforms[loc];
> +   if (uni->type->is_sampler()) {
> +      components = 1;
> +   } else {
> +      components = uni->type->vector_elements;
> +   }
> +
> +   bool match;
> +   switch (uni->type->base_type) {
> +   case GLSL_TYPE_BOOL:
> +      match = true;
> +      break;
> +   case GLSL_TYPE_SAMPLER:
> +      match = (basicType == GLSL_TYPE_INT);
> +      break;
> +   default:
> +      match = (basicType == uni->type->base_type);
> +      break;
> +   }
> +
> +   if (uni->type->is_matrix() || components != src_components || !match) {
> +      _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)");
> +      return;
> +   }
>
>    if (ctx->Shader.Flags & GLSL_UNIFORMS) {
> -      const GLenum basicType = base_uniform_type(type);
> -      GLint i;
> -      printf("Mesa: set program %u uniform %s (loc %d) to: ",
> -            shProg->Name, uniform->Name, location);
> -      if (basicType == GL_INT) {
> -         const GLint *v = (const GLint *) values;
> -         for (i = 0; i < count * elems; i++) {
> -            printf("%d ", v[i]);
> -         }
> -      }
> -      else if (basicType == GL_UNSIGNED_INT) {
> -         const GLuint *v = (const GLuint *) values;
> -         for (i = 0; i < count * elems; i++) {
> -            printf("%u ", v[i]);
> -         }
> -      }
> -      else {
> -         const GLfloat *v = (const GLfloat *) values;
> -         assert(basicType == GL_FLOAT);
> -         for (i = 0; i < count * elems; i++) {
> -            printf("%g ", v[i]);
> +      log_uniform(values, basicType, components, 1, count,
> +                 false, shProg, location, uni);
> +   }
> +
> +   /* Validate the texture unit setting.
> +    */
> +   /* FINISHME: I cannot find any justification for this in the GL spec.
> +    */
> +   if (uni->type->is_sampler()) {
> +      for (i = 0; i < count; i++) {
> +        const unsigned texUnit = ((unsigned *) values)[i];
> +
> +         /* check that the sampler (tex unit index) is legal */
> +         if (texUnit >= ctx->Const.MaxCombinedTextureImageUnits) {
> +            _mesa_error(ctx, GL_INVALID_VALUE,
> +                        "glUniform1i(invalid sampler/tex unit index for "
> +                       "uniform %d)",
> +                        location);
> +            return;
>          }
>       }
> -      printf("\n");
>    }
>
> -   /* A uniform var may be used by both a vertex shader and a fragment
> -    * shader.  We may need to update one or both shader's uniform here:
> +   /* Page 82 (page 96 of the PDF) of the OpenGL 2.1 spec says:
> +    *
> +    *     "When loading N elements starting at an arbitrary position k in a
> +    *     uniform declared as an array, elements k through k + N - 1 in the
> +    *     array will be replaced with the new values. Values for any array
> +    *     element that exceeds the highest array element index used, as
> +    *     reported by GetActiveUniform, will be ignored by the GL."
> +    *
> +    * Clamp 'count' to a valid value.  Note that for non-arrays a count > 1
> +    * will have already generated an error.
>     */
> -   if (shProg->_LinkedShaders[MESA_SHADER_VERTEX]) {
> -      /* convert uniform location to program parameter index */
> -      GLint index = uniform->VertPos;
> -      if (index >= 0) {
> -         set_program_uniform(ctx,
> -                             shProg->_LinkedShaders[MESA_SHADER_VERTEX]->Program,
> -                             index, offset, type, count, elems, values);
> -      }
> +   if (uni->array_elements != 0) {
> +      if (offset >= uni->array_elements)
> +        return;
> +
> +      count = MIN2(count, (uni->array_elements - offset));
>    }
>
> -   if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
> -      /* convert uniform location to program parameter index */
> -      GLint index = uniform->FragPos;
> -      if (index >= 0) {
> -         set_program_uniform(ctx,
> -                            shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program,
> -                             index, offset, type, count, elems, values);
> +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
> +
> +   /* Store the data in the "actual type" backing storage for the uniform.
> +    */
> +   if (!uni->type->is_boolean()) {
> +      memcpy(&uni->storage[components * offset], values,
> +            sizeof(uni->storage[0]) * components * count);
> +   } else {
> +      const union gl_constant_value *src =
> +        (const union gl_constant_value *) values;
> +      union gl_constant_value *dst = &uni->storage[components * offset];
> +      const unsigned elems = components * count;
> +
> +      for (i = 0; i < elems; i++) {
> +        if (basicType == GLSL_TYPE_FLOAT) {
> +           dst[i].i = src[i].f != 0.0f ? 1 : 0;
> +        } else {
> +           dst[i].i = src[i].i != 0    ? 1 : 0;
> +        }
>       }
>    }
>
> -   if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]) {
> -      /* convert uniform location to program parameter index */
> -      GLint index = uniform->GeomPos;
> -      if (index >= 0) {
> -         set_program_uniform(ctx,
> -                            shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program,
> -                             index, offset, type, count, elems, values);
> +   uni->initialized = true;
> +   uni->dirty = true;
> +
> +   _mesa_propagate_uniforms_to_driver_storage(uni, offset, count);
> +
> +   /* If the uniform is a sampler, do the extra magic necessary to propagate
> +    * the changes through.
> +    */
> +   if (uni->type->is_sampler()) {
> +      for (i = 0; i < count; i++) {
> +        shProg->SamplerUnits[uni->sampler + offset + i] =
> +           ((unsigned *) values)[i];
>       }
> -   }
>
> -   uniform->Initialized = GL_TRUE;
> +      bool flushed = false;
> +      for (i = 0; i < MESA_SHADER_TYPES; i++) {
> +        struct gl_program *prog;
> +
> +        if (shProg->_LinkedShaders[i] == NULL)
> +           continue;
> +
> +        prog = shProg->_LinkedShaders[i]->Program;
> +
> +        assert(sizeof(prog->SamplerUnits) == sizeof(shProg->SamplerUnits));
> +
> +        if (memcmp(prog->SamplerUnits,
> +                   shProg->SamplerUnits,
> +                   sizeof(shProg->SamplerUnits)) != 0) {
> +           if (!flushed) {
> +              FLUSH_VERTICES(ctx, _NEW_TEXTURE | _NEW_PROGRAM);
> +              flushed = true;
> +           }
> +
> +           memcpy(prog->SamplerUnits,
> +                  shProg->SamplerUnits,
> +                  sizeof(shProg->SamplerUnits));
> +
> +           _mesa_update_shader_textures_used(prog);
> +           (void) ctx->Driver.ProgramStringNotify(ctx, prog->Target, prog);

Hi Ian,

This ProgramStringNotify call causes excessive shader recompilations
in the game Cogs (from some Humble Bundle). Approximately 25% of
in-game CPU time is spent on translating shaders and register
allocation. If the shaders were larger, it would be much worse.
Moreover, I have realized calling ProgramStringNotify is not needed
for st/mesa in this case at all (I just comment it out if I want
higher frame rates), because it doesn't use the prog->SamplerUnits
array when translating shaders. The array is only used when we set
samplers, so setting _NEW_TEXTURE is sufficient. It seems that i965
could do the same thing. Another option would be to cache precompiled
shaders, but that seems more complicated than doing what st/mesa does.

Marek