[Mesa-dev] [PATCH 18/20] mesa: Rewrite the way uniforms are tracked and handled

Wed Jan 4 03:28:12 PST 2012

On 01/03/2012 09:43 PM, Marek Olšák wrote:
> On Fri, Oct 28, 2011 at 7:42 PM, Ian Romanick <idr at freedesktop.org> wrote:
>> diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
>> index db2f200..50a724b 100644
>> --- a/src/mesa/main/uniform_query.cpp
>> +++ b/src/mesa/main/uniform_query.cpp
>> @@ -22,15 +22,16 @@
>>  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
>>  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>>  */
>> +#include <stdlib.h>
>>  #include "main/core.h"
>>  #include "main/context.h"
>>  #include "ir.h"
>>  #include "ir_uniform.h"
>> +#include "program/hash_table.h"
>>  #include "../glsl/program.h"
>>  #include "../glsl/ir_uniform.h"
>>
>>  extern "C" {
>> -#include "main/image.h"
>>  #include "main/shaderapi.h"
>>  #include "main/shaderobj.h"
>>  #include "uniforms.h"
>> @@ -44,42 +45,30 @@ _mesa_GetActiveUniformARB(GLhandleARB program, GLuint index,
>>    GET_CURRENT_CONTEXT(ctx);
>>    struct gl_shader_program *shProg =
>>       _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
>> -   const struct gl_program_parameter *param;
>>
>>    if (!shProg)
>>       return;
>>
>> -   if (!shProg->Uniforms || index >= shProg->Uniforms->NumUniforms) {
>> +   if (index >= shProg->NumUserUniformStorage) {
>>       _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(index)");
>>       return;
>>    }
>>
>> -   param = get_uniform_parameter(shProg, index);
>> -   if (!param)
>> -      return;
>> -
>> -   const struct gl_uniform *const uni = &shProg->Uniforms->Uniforms[index];
>> +   const struct gl_uniform_storage *const uni = &shProg->UniformStorage[index];
>>
>>    if (nameOut) {
>> -      _mesa_copy_string(nameOut, maxLength, length, param->Name);
>> +      _mesa_copy_string(nameOut, maxLength, length, uni->name);
>>    }
>>
>>    if (size) {
>> -      GLint typeSize = _mesa_sizeof_glsl_type(uni->Type->gl_type);
>> -      if ((GLint) param->Size > typeSize) {
>> -         /* This is an array.
>> -          * Array elements are placed on vector[4] boundaries so they're
>> -          * a multiple of four floats.  We round typeSize up to next multiple
>> -          * of four to get the right size below.
>> -          */
>> -         typeSize = (typeSize + 3) & ~3;
>> -      }
>> -      /* Note that the returned size is in units of the <type>, not bytes */
>> -      *size = param->Size / typeSize;
>> +      /* array_elements is zero for non-arrays, but the API requires that 1 be
>> +       * returned.
>> +       */
>> +      *size = MAX2(1, uni->array_elements);
>>    }
>>
>>    if (type) {
>> -      *type = uni->Type->gl_type;
>> +      *type = uni->type->gl_type;
>>    }
>>  }
>>
>> @@ -409,12 +398,21 @@ validate_uniform_parameters(struct gl_context *ctx,
>>
>>    _mesa_uniform_split_location_offset(location, loc, array_index);
>>
>> -   if (*loc >= shProg->Uniforms->NumUniforms) {
>> +   if (*loc >= shProg->NumUserUniformStorage) {
>>       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
>>                  caller, location);
>>       return false;
>>    }
>>
>> +   /* This case should be impossible.  The implication is that a call like
>> +    * glGetUniformLocation(prog, "foo[8]") was successful but "foo" is not an
>> +    * array.
>> +    */
>> +   if (*array_index != 0 && shProg->UniformStorage[*loc].array_elements == 0) {
>> +      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
>> +                 caller, location);
>> +      return false;
>> +   }
>>    return true;
>>  }
>>
>> @@ -423,72 +421,81 @@ validate_uniform_parameters(struct gl_context *ctx,
>>  */
>>  extern "C" void
>>  _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
>> -                 GLsizei bufSize, GLenum returnType, GLvoid *paramsOut)
>> +                 GLsizei bufSize, enum glsl_base_type returnType,
>> +                 GLvoid *paramsOut)
>>  {
>>    struct gl_shader_program *shProg =
>>       _mesa_lookup_shader_program_err(ctx, program, "glGetUniformfv");
>> -   struct gl_program *prog;
>> -   GLint paramPos;
>> +   struct gl_uniform_storage *uni;
>>    unsigned loc, offset;
>>
>>    if (!validate_uniform_parameters(ctx, shProg, location, 1,
>>                                    &loc, &offset, "glGetUniform", true))
>>       return;
>>
>> -   if (!find_uniform_parameter_pos(shProg, loc, &prog, &paramPos)) {
>> -      _mesa_error(ctx, GL_INVALID_OPERATION,  "glGetUniformfv(location)");
>> -   }
>> -   else {
>> -      const struct gl_program_parameter *p =
>> -         &prog->Parameters->Parameters[paramPos];
>> -      gl_constant_value (*values)[4];
>> -      GLint rows, cols, i, j, k;
>> -      GLsizei numBytes;
>> -      GLenum storage_type;
>> -
>> -      values = prog->Parameters->ParameterValues + paramPos + offset;
>> -
>> -      get_uniform_rows_cols(p, &rows, &cols);
>> -
>> -      numBytes = rows * cols * _mesa_sizeof_type(returnType);
>> -      if (bufSize < numBytes) {
>> -         _mesa_error( ctx, GL_INVALID_OPERATION,
>> -                     "glGetnUniformfvARB(out of bounds: bufSize is %d,"
>> -                     " but %d bytes are required)", bufSize, numBytes );
>> -         return;
>> -      }
>> +   uni = &shProg->UniformStorage[loc];
>>
>> -      if (ctx->Const.NativeIntegers) {
>> -        storage_type = base_uniform_type(p->DataType);
>> -      } else {
>> -        storage_type = GL_FLOAT;
>> -      }
>> +   {
>> +      unsigned elements = (uni->type->is_sampler())
>> +        ? 1 : uni->type->components();
>>
>> -      k = 0;
>> -      for (i = 0; i < rows; i++) {
>> -        for (j = 0; j < cols; j++ ) {
>> -           void *out = (char *)paramsOut + 4 * k;
>> +      /* Calculate the source base address *BEFORE* modifying elements to
>> +       * account for the size of the user's buffer.
>> +       */
>> +      const union gl_constant_value *const src =
>> +        &uni->storage[offset * elements];
>> +
>> +      unsigned bytes = sizeof(uni->storage[0]) * elements;
>> +      if (bytes > bufSize) {
>> +        elements = bufSize / sizeof(uni->storage[0]);
>> +        bytes = bufSize;
>> +      }
>>
>> +      /* If the return type and the uniform's native type are "compatible,"
>> +       * just memcpy the data.  If the types are not compatible, perform a
>> +       * slower convert-and-copy process.
>> +       */
>> +      if (returnType == uni->type->base_type
>> +         || ((returnType == GLSL_TYPE_INT
>> +              || returnType == GLSL_TYPE_UINT
>> +              || returnType == GLSL_TYPE_SAMPLER)
>> +             &&
>> +             (uni->type->base_type == GLSL_TYPE_INT
>> +              || uni->type->base_type == GLSL_TYPE_UINT
>> +              || uni->type->base_type == GLSL_TYPE_SAMPLER))) {
>> +        memcpy(paramsOut, src, bytes);
>> +      } else {
>> +        union gl_constant_value *const dst =
>> +           (union gl_constant_value *) paramsOut;
>> +
>> +        /* This code could be optimized by putting the loop inside the switch
>> +         * statements.  However, this is not expected to be
>> +         * performance-critical code.
>> +         */
>> +        for (unsigned i = 0; i < elements; i++) {
>>            switch (returnType) {
>> -           case GL_FLOAT:
>> -              switch (storage_type) {
>> -              case GL_FLOAT:
>> -                 *(float *)out = values[i][j].f;
>> +           case GLSL_TYPE_FLOAT:
>> +              switch (uni->type->base_type) {
>> +              case GLSL_TYPE_UINT:
>> +                 dst[i].f = (float) src[i].u;
>> +                 break;
>> +              case GLSL_TYPE_INT:
>> +              case GLSL_TYPE_SAMPLER:
>> +                 dst[i].f = (float) src[i].i;
>>                  break;
>> -              case GL_INT:
>> -              case GL_BOOL: /* boolean is just an integer 1 or 0. */
>> -                 *(float *)out = values[i][j].i;
>> +              case GLSL_TYPE_BOOL:
>> +                 dst[i].f = src[i].i ? 1.0f : 0.0f;
>>                  break;
>> -              case GL_UNSIGNED_INT:
>> -                 *(float *)out = values[i][j].u;
>> +              default:
>> +                 assert(!"Should not get here.");
>>                  break;
>>               }
>>               break;
>>
>> -           case GL_INT:
>> -           case GL_UNSIGNED_INT:
>> -              switch (storage_type) {
>> -              case GL_FLOAT:
>> +           case GLSL_TYPE_INT:
>> +           case GLSL_TYPE_UINT:
>> +              switch (uni->type->base_type) {
>> +              case GLSL_TYPE_FLOAT:
>>                  /* While the GL 3.2 core spec doesn't explicitly
>>                   * state how conversion of float uniforms to integer
>>                   * values works, in section 6.2 "State Tables" on
>> @@ -506,23 +513,21 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
>>                   *      a floating-point value is rounded to the
>>                   *      nearest integer..."
>>                   */
>> -                 *(int *)out = IROUND(values[i][j].f);
>> +                 dst[i].i = IROUND(src[i].f);
>>                  break;
>> -
>> -              case GL_INT:
>> -              case GL_UNSIGNED_INT:
>> -              case GL_BOOL:
>> -                 /* type conversions for these to int/uint are just
>> -                  * copying the data.
>> -                  */
>> -                 *(int *)out = values[i][j].i;
>> +              case GLSL_TYPE_BOOL:
>> +                 dst[i].i = src[i].i ? 1 : 0;
>>                  break;
>> +              default:
>> +                 assert(!"Should not get here.");
>>                  break;
>>               }
>>               break;
>> -           }
>>
>> -           k++;
>> +           default:
>> +              assert(!"Should not get here.");
>> +              break;
>> +           }
>>         }
>>       }
>>    }
>> @@ -925,9 +930,12 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
>>              GLint location, GLsizei count,
>>               const GLvoid *values, GLenum type)
>>  {
>> -   struct gl_uniform *uniform;
>> -   GLint elems;
>>    unsigned loc, offset;
>> +   unsigned components;
>> +   unsigned src_components;
>> +   unsigned i;
>> +   enum glsl_base_type basicType;
>> +   struct gl_uniform_storage *uni;
>>
>>    ASSERT_OUTSIDE_BEGIN_END(ctx);
>>
>> @@ -935,73 +943,207 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
>>                                    &loc, &offset, "glUniform", false))
>>       return;
>>
>> -   elems = _mesa_sizeof_glsl_type(type);
>> +   uni = &shProg->UniformStorage[loc];
>>
>> -   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
>> +   /* Verify that the types are compatible.
>> +    */
>> +   switch (type) {
>> +   case GL_FLOAT:
>> +      basicType = GLSL_TYPE_FLOAT;
>> +      src_components = 1;
>> +      break;
>> +   case GL_FLOAT_VEC2:
>> +      basicType = GLSL_TYPE_FLOAT;
>> +      src_components = 2;
>> +      break;
>> +   case GL_FLOAT_VEC3:
>> +      basicType = GLSL_TYPE_FLOAT;
>> +      src_components = 3;
>> +      break;
>> +   case GL_FLOAT_VEC4:
>> +      basicType = GLSL_TYPE_FLOAT;
>> +      src_components = 4;
>> +      break;
>> +   case GL_UNSIGNED_INT:
>> +      basicType = GLSL_TYPE_UINT;
>> +      src_components = 1;
>> +      break;
>> +   case GL_UNSIGNED_INT_VEC2:
>> +      basicType = GLSL_TYPE_UINT;
>> +      src_components = 2;
>> +      break;
>> +   case GL_UNSIGNED_INT_VEC3:
>> +      basicType = GLSL_TYPE_UINT;
>> +      src_components = 3;
>> +      break;
>> +   case GL_UNSIGNED_INT_VEC4:
>> +      basicType = GLSL_TYPE_UINT;
>> +      src_components = 4;
>> +      break;
>> +   case GL_INT:
>> +      basicType = GLSL_TYPE_INT;
>> +      src_components = 1;
>> +      break;
>> +   case GL_INT_VEC2:
>> +      basicType = GLSL_TYPE_INT;
>> +      src_components = 2;
>> +      break;
>> +   case GL_INT_VEC3:
>> +      basicType = GLSL_TYPE_INT;
>> +      src_components = 3;
>> +      break;
>> +   case GL_INT_VEC4:
>> +      basicType = GLSL_TYPE_INT;
>> +      src_components = 4;
>> +      break;
>> +   case GL_BOOL:
>> +   case GL_BOOL_VEC2:
>> +   case GL_BOOL_VEC3:
>> +   case GL_BOOL_VEC4:
>> +   case GL_FLOAT_MAT2:
>> +   case GL_FLOAT_MAT2x3:
>> +   case GL_FLOAT_MAT2x4:
>> +   case GL_FLOAT_MAT3x2:
>> +   case GL_FLOAT_MAT3:
>> +   case GL_FLOAT_MAT3x4:
>> +   case GL_FLOAT_MAT4x2:
>> +   case GL_FLOAT_MAT4x3:
>> +   case GL_FLOAT_MAT4:
>> +   default:
>> +      _mesa_problem(NULL, "Invalid type in %s", __func__);
>> +      return;
>> +   }
>>
>> -   uniform = &shProg->Uniforms->Uniforms[loc];
>> +   if (uni->type->is_sampler()) {
>> +      components = 1;
>> +   } else {
>> +      components = uni->type->vector_elements;
>> +   }
>> +
>> +   bool match;
>> +   switch (uni->type->base_type) {
>> +   case GLSL_TYPE_BOOL:
>> +      match = true;
>> +      break;
>> +   case GLSL_TYPE_SAMPLER:
>> +      match = (basicType == GLSL_TYPE_INT);
>> +      break;
>> +   default:
>> +      match = (basicType == uni->type->base_type);
>> +      break;
>> +   }
>> +
>> +   if (uni->type->is_matrix() || components != src_components || !match) {
>> +      _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)");
>> +      return;
>> +   }
>>
>>    if (ctx->Shader.Flags & GLSL_UNIFORMS) {
>> -      const GLenum basicType = base_uniform_type(type);
>> -      GLint i;
>> -      printf("Mesa: set program %u uniform %s (loc %d) to: ",
>> -            shProg->Name, uniform->Name, location);
>> -      if (basicType == GL_INT) {
>> -         const GLint *v = (const GLint *) values;
>> -         for (i = 0; i < count * elems; i++) {
>> -            printf("%d ", v[i]);
>> -         }
>> -      }
>> -      else if (basicType == GL_UNSIGNED_INT) {
>> -         const GLuint *v = (const GLuint *) values;
>> -         for (i = 0; i < count * elems; i++) {
>> -            printf("%u ", v[i]);
>> -         }
>> -      }
>> -      else {
>> -         const GLfloat *v = (const GLfloat *) values;
>> -         assert(basicType == GL_FLOAT);
>> -         for (i = 0; i < count * elems; i++) {
>> -            printf("%g ", v[i]);
>> +      log_uniform(values, basicType, components, 1, count,
>> +                 false, shProg, location, uni);
>> +   }
>> +
>> +   /* Validate the texture unit setting.
>> +    */
>> +   /* FINISHME: I cannot find any justification for this in the GL spec.
>> +    */
>> +   if (uni->type->is_sampler()) {
>> +      for (i = 0; i < count; i++) {
>> +        const unsigned texUnit = ((unsigned *) values)[i];
>> +
>> +         /* check that the sampler (tex unit index) is legal */
>> +         if (texUnit >= ctx->Const.MaxCombinedTextureImageUnits) {
>> +            _mesa_error(ctx, GL_INVALID_VALUE,
>> +                        "glUniform1i(invalid sampler/tex unit index for "
>> +                       "uniform %d)",
>> +                        location);
>> +            return;
>>          }
>>       }
>> -      printf("\n");
>>    }
>>
>> -   /* A uniform var may be used by both a vertex shader and a fragment
>> -    * shader.  We may need to update one or both shader's uniform here:
>> +   /* Page 82 (page 96 of the PDF) of the OpenGL 2.1 spec says:
>> +    *
>> +    *     "When loading N elements starting at an arbitrary position k in a
>> +    *     uniform declared as an array, elements k through k + N - 1 in the
>> +    *     array will be replaced with the new values. Values for any array
>> +    *     element that exceeds the highest array element index used, as
>> +    *     reported by GetActiveUniform, will be ignored by the GL."
>> +    *
>> +    * Clamp 'count' to a valid value.  Note that for non-arrays a count > 1
>> +    * will have already generated an error.
>>     */
>> -   if (shProg->_LinkedShaders[MESA_SHADER_VERTEX]) {
>> -      /* convert uniform location to program parameter index */
>> -      GLint index = uniform->VertPos;
>> -      if (index >= 0) {
>> -         set_program_uniform(ctx,
>> -                             shProg->_LinkedShaders[MESA_SHADER_VERTEX]->Program,
>> -                             index, offset, type, count, elems, values);
>> -      }
>> +   if (uni->array_elements != 0) {
>> +      if (offset >= uni->array_elements)
>> +        return;
>> +
>> +      count = MIN2(count, (uni->array_elements - offset));
>>    }
>>
>> -   if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
>> -      /* convert uniform location to program parameter index */
>> -      GLint index = uniform->FragPos;
>> -      if (index >= 0) {
>> -         set_program_uniform(ctx,
>> -                            shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program,
>> -                             index, offset, type, count, elems, values);
>> +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
>> +
>> +   /* Store the data in the "actual type" backing storage for the uniform.
>> +    */
>> +   if (!uni->type->is_boolean()) {
>> +      memcpy(&uni->storage[components * offset], values,
>> +            sizeof(uni->storage[0]) * components * count);
>> +   } else {
>> +      const union gl_constant_value *src =
>> +        (const union gl_constant_value *) values;
>> +      union gl_constant_value *dst = &uni->storage[components * offset];
>> +      const unsigned elems = components * count;
>> +
>> +      for (i = 0; i < elems; i++) {
>> +        if (basicType == GLSL_TYPE_FLOAT) {
>> +           dst[i].i = src[i].f != 0.0f ? 1 : 0;
>> +        } else {
>> +           dst[i].i = src[i].i != 0    ? 1 : 0;
>> +        }
>>       }
>>    }
>>
>> -   if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]) {
>> -      /* convert uniform location to program parameter index */
>> -      GLint index = uniform->GeomPos;
>> -      if (index >= 0) {
>> -         set_program_uniform(ctx,
>> -                            shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program,
>> -                             index, offset, type, count, elems, values);
>> +   uni->initialized = true;
>> +   uni->dirty = true;
>> +
>> +   _mesa_propagate_uniforms_to_driver_storage(uni, offset, count);
>> +
>> +   /* If the uniform is a sampler, do the extra magic necessary to propagate
>> +    * the changes through.
>> +    */
>> +   if (uni->type->is_sampler()) {
>> +      for (i = 0; i < count; i++) {
>> +        shProg->SamplerUnits[uni->sampler + offset + i] =
>> +           ((unsigned *) values)[i];
>>       }
>> -   }
>>
>> -   uniform->Initialized = GL_TRUE;
>> +      bool flushed = false;
>> +      for (i = 0; i < MESA_SHADER_TYPES; i++) {
>> +        struct gl_program *prog;
>> +
>> +        if (shProg->_LinkedShaders[i] == NULL)
>> +           continue;
>> +
>> +        prog = shProg->_LinkedShaders[i]->Program;
>> +
>> +        assert(sizeof(prog->SamplerUnits) == sizeof(shProg->SamplerUnits));
>> +
>> +        if (memcmp(prog->SamplerUnits,
>> +                   shProg->SamplerUnits,
>> +                   sizeof(shProg->SamplerUnits)) != 0) {
>> +           if (!flushed) {
>> +              FLUSH_VERTICES(ctx, _NEW_TEXTURE | _NEW_PROGRAM);
>> +              flushed = true;
>> +           }
>> +
>> +           memcpy(prog->SamplerUnits,
>> +                  shProg->SamplerUnits,
>> +                  sizeof(shProg->SamplerUnits));
>> +
>> +           _mesa_update_shader_textures_used(prog);
>> +           (void) ctx->Driver.ProgramStringNotify(ctx, prog->Target, prog);
> 
> Hi Ian,
> 
> This ProgramStringNotify call causes excessive shader recompilations
> in the game Cogs (from some Humble Bundle). Approximately 25% of
> in-game CPU time is spent on translating shaders and register
> allocation. If the shaders were larger, it would be much worse.
> Moreover, I have realized calling ProgramStringNotify is not needed
> for st/mesa in this case at all (I just comment it out if I want
> higher frame rates), because it doesn't use the prog->SamplerUnits
> array when translating shaders. The array is only used when we set
> samplers, so setting _NEW_TEXTURE is sufficient. It seems that i965
> could do the same thing. Another option would be to cache precompiled
> shaders, but that seems more complicated than doing what st/mesa does.
> 
> Marek

Yeah, I noticed that a while back too, but we never arrived at a
solution and it slipped off my radar.  I agree completely,
ProgramStringNotify is completely overkill and bogus.

We actually already rely on _NEW_TEXTURE for all shader programs.  Since
the uniform code already flags _NEW_TEXTURE, we should be able to just
add the SamplerUnits array to the program key (specifically,
brw_sampler_prog_key_data), and that should take care of any state
dependent recompiles.  Then we wouldn't need ProgramStringNotify at all.

Right?