[Mesa-dev] [PATCH 18/20] mesa: Rewrite the way uniforms are tracked and handled
Kenneth Graunke
kenneth at whitecape.org
Wed Jan 4 03:28:12 PST 2012
On 01/03/2012 09:43 PM, Marek Olšák wrote:
> On Fri, Oct 28, 2011 at 7:42 PM, Ian Romanick <idr at freedesktop.org> wrote:
>> diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
>> index db2f200..50a724b 100644
>> --- a/src/mesa/main/uniform_query.cpp
>> +++ b/src/mesa/main/uniform_query.cpp
>> @@ -22,15 +22,16 @@
>> * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
>> * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>> */
>> +#include <stdlib.h>
>> #include "main/core.h"
>> #include "main/context.h"
>> #include "ir.h"
>> #include "ir_uniform.h"
>> +#include "program/hash_table.h"
>> #include "../glsl/program.h"
>> #include "../glsl/ir_uniform.h"
>>
>> extern "C" {
>> -#include "main/image.h"
>> #include "main/shaderapi.h"
>> #include "main/shaderobj.h"
>> #include "uniforms.h"
>> @@ -44,42 +45,30 @@ _mesa_GetActiveUniformARB(GLhandleARB program, GLuint index,
>> GET_CURRENT_CONTEXT(ctx);
>> struct gl_shader_program *shProg =
>> _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
>> - const struct gl_program_parameter *param;
>>
>> if (!shProg)
>> return;
>>
>> - if (!shProg->Uniforms || index >= shProg->Uniforms->NumUniforms) {
>> + if (index >= shProg->NumUserUniformStorage) {
>> _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(index)");
>> return;
>> }
>>
>> - param = get_uniform_parameter(shProg, index);
>> - if (!param)
>> - return;
>> -
>> - const struct gl_uniform *const uni = &shProg->Uniforms->Uniforms[index];
>> + const struct gl_uniform_storage *const uni = &shProg->UniformStorage[index];
>>
>> if (nameOut) {
>> - _mesa_copy_string(nameOut, maxLength, length, param->Name);
>> + _mesa_copy_string(nameOut, maxLength, length, uni->name);
>> }
>>
>> if (size) {
>> - GLint typeSize = _mesa_sizeof_glsl_type(uni->Type->gl_type);
>> - if ((GLint) param->Size > typeSize) {
>> - /* This is an array.
>> - * Array elements are placed on vector[4] boundaries so they're
>> - * a multiple of four floats. We round typeSize up to next multiple
>> - * of four to get the right size below.
>> - */
>> - typeSize = (typeSize + 3) & ~3;
>> - }
>> - /* Note that the returned size is in units of the <type>, not bytes */
>> - *size = param->Size / typeSize;
>> + /* array_elements is zero for non-arrays, but the API requires that 1 be
>> + * returned.
>> + */
>> + *size = MAX2(1, uni->array_elements);
>> }
>>
>> if (type) {
>> - *type = uni->Type->gl_type;
>> + *type = uni->type->gl_type;
>> }
>> }
>>
>> @@ -409,12 +398,21 @@ validate_uniform_parameters(struct gl_context *ctx,
>>
>> _mesa_uniform_split_location_offset(location, loc, array_index);
>>
>> - if (*loc >= shProg->Uniforms->NumUniforms) {
>> + if (*loc >= shProg->NumUserUniformStorage) {
>> _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
>> caller, location);
>> return false;
>> }
>>
>> + /* This case should be impossible. The implication is that a call like
>> + * glGetUniformLocation(prog, "foo[8]") was successful but "foo" is not an
>> + * array.
>> + */
>> + if (*array_index != 0 && shProg->UniformStorage[*loc].array_elements == 0) {
>> + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
>> + caller, location);
>> + return false;
>> + }
>> return true;
>> }
>>
>> @@ -423,72 +421,81 @@ validate_uniform_parameters(struct gl_context *ctx,
>> */
>> extern "C" void
>> _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
>> - GLsizei bufSize, GLenum returnType, GLvoid *paramsOut)
>> + GLsizei bufSize, enum glsl_base_type returnType,
>> + GLvoid *paramsOut)
>> {
>> struct gl_shader_program *shProg =
>> _mesa_lookup_shader_program_err(ctx, program, "glGetUniformfv");
>> - struct gl_program *prog;
>> - GLint paramPos;
>> + struct gl_uniform_storage *uni;
>> unsigned loc, offset;
>>
>> if (!validate_uniform_parameters(ctx, shProg, location, 1,
>> &loc, &offset, "glGetUniform", true))
>> return;
>>
>> - if (!find_uniform_parameter_pos(shProg, loc, &prog, ¶mPos)) {
>> - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformfv(location)");
>> - }
>> - else {
>> - const struct gl_program_parameter *p =
>> - &prog->Parameters->Parameters[paramPos];
>> - gl_constant_value (*values)[4];
>> - GLint rows, cols, i, j, k;
>> - GLsizei numBytes;
>> - GLenum storage_type;
>> -
>> - values = prog->Parameters->ParameterValues + paramPos + offset;
>> -
>> - get_uniform_rows_cols(p, &rows, &cols);
>> -
>> - numBytes = rows * cols * _mesa_sizeof_type(returnType);
>> - if (bufSize < numBytes) {
>> - _mesa_error( ctx, GL_INVALID_OPERATION,
>> - "glGetnUniformfvARB(out of bounds: bufSize is %d,"
>> - " but %d bytes are required)", bufSize, numBytes );
>> - return;
>> - }
>> + uni = &shProg->UniformStorage[loc];
>>
>> - if (ctx->Const.NativeIntegers) {
>> - storage_type = base_uniform_type(p->DataType);
>> - } else {
>> - storage_type = GL_FLOAT;
>> - }
>> + {
>> + unsigned elements = (uni->type->is_sampler())
>> + ? 1 : uni->type->components();
>>
>> - k = 0;
>> - for (i = 0; i < rows; i++) {
>> - for (j = 0; j < cols; j++ ) {
>> - void *out = (char *)paramsOut + 4 * k;
>> + /* Calculate the source base address *BEFORE* modifying elements to
>> + * account for the size of the user's buffer.
>> + */
>> + const union gl_constant_value *const src =
>> + &uni->storage[offset * elements];
>> +
>> + unsigned bytes = sizeof(uni->storage[0]) * elements;
>> + if (bytes > bufSize) {
>> + elements = bufSize / sizeof(uni->storage[0]);
>> + bytes = bufSize;
>> + }
>>
>> + /* If the return type and the uniform's native type are "compatible,"
>> + * just memcpy the data. If the types are not compatible, perform a
>> + * slower convert-and-copy process.
>> + */
>> + if (returnType == uni->type->base_type
>> + || ((returnType == GLSL_TYPE_INT
>> + || returnType == GLSL_TYPE_UINT
>> + || returnType == GLSL_TYPE_SAMPLER)
>> + &&
>> + (uni->type->base_type == GLSL_TYPE_INT
>> + || uni->type->base_type == GLSL_TYPE_UINT
>> + || uni->type->base_type == GLSL_TYPE_SAMPLER))) {
>> + memcpy(paramsOut, src, bytes);
>> + } else {
>> + union gl_constant_value *const dst =
>> + (union gl_constant_value *) paramsOut;
>> +
>> + /* This code could be optimized by putting the loop inside the switch
>> + * statements. However, this is not expected to be
>> + * performance-critical code.
>> + */
>> + for (unsigned i = 0; i < elements; i++) {
>> switch (returnType) {
>> - case GL_FLOAT:
>> - switch (storage_type) {
>> - case GL_FLOAT:
>> - *(float *)out = values[i][j].f;
>> + case GLSL_TYPE_FLOAT:
>> + switch (uni->type->base_type) {
>> + case GLSL_TYPE_UINT:
>> + dst[i].f = (float) src[i].u;
>> + break;
>> + case GLSL_TYPE_INT:
>> + case GLSL_TYPE_SAMPLER:
>> + dst[i].f = (float) src[i].i;
>> break;
>> - case GL_INT:
>> - case GL_BOOL: /* boolean is just an integer 1 or 0. */
>> - *(float *)out = values[i][j].i;
>> + case GLSL_TYPE_BOOL:
>> + dst[i].f = src[i].i ? 1.0f : 0.0f;
>> break;
>> - case GL_UNSIGNED_INT:
>> - *(float *)out = values[i][j].u;
>> + default:
>> + assert(!"Should not get here.");
>> break;
>> }
>> break;
>>
>> - case GL_INT:
>> - case GL_UNSIGNED_INT:
>> - switch (storage_type) {
>> - case GL_FLOAT:
>> + case GLSL_TYPE_INT:
>> + case GLSL_TYPE_UINT:
>> + switch (uni->type->base_type) {
>> + case GLSL_TYPE_FLOAT:
>> /* While the GL 3.2 core spec doesn't explicitly
>> * state how conversion of float uniforms to integer
>> * values works, in section 6.2 "State Tables" on
>> @@ -506,23 +513,21 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
>> * a floating-point value is rounded to the
>> * nearest integer..."
>> */
>> - *(int *)out = IROUND(values[i][j].f);
>> + dst[i].i = IROUND(src[i].f);
>> break;
>> -
>> - case GL_INT:
>> - case GL_UNSIGNED_INT:
>> - case GL_BOOL:
>> - /* type conversions for these to int/uint are just
>> - * copying the data.
>> - */
>> - *(int *)out = values[i][j].i;
>> + case GLSL_TYPE_BOOL:
>> + dst[i].i = src[i].i ? 1 : 0;
>> break;
>> + default:
>> + assert(!"Should not get here.");
>> break;
>> }
>> break;
>> - }
>>
>> - k++;
>> + default:
>> + assert(!"Should not get here.");
>> + break;
>> + }
>> }
>> }
>> }
>> @@ -925,9 +930,12 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
>> GLint location, GLsizei count,
>> const GLvoid *values, GLenum type)
>> {
>> - struct gl_uniform *uniform;
>> - GLint elems;
>> unsigned loc, offset;
>> + unsigned components;
>> + unsigned src_components;
>> + unsigned i;
>> + enum glsl_base_type basicType;
>> + struct gl_uniform_storage *uni;
>>
>> ASSERT_OUTSIDE_BEGIN_END(ctx);
>>
>> @@ -935,73 +943,207 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
>> &loc, &offset, "glUniform", false))
>> return;
>>
>> - elems = _mesa_sizeof_glsl_type(type);
>> + uni = &shProg->UniformStorage[loc];
>>
>> - FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
>> + /* Verify that the types are compatible.
>> + */
>> + switch (type) {
>> + case GL_FLOAT:
>> + basicType = GLSL_TYPE_FLOAT;
>> + src_components = 1;
>> + break;
>> + case GL_FLOAT_VEC2:
>> + basicType = GLSL_TYPE_FLOAT;
>> + src_components = 2;
>> + break;
>> + case GL_FLOAT_VEC3:
>> + basicType = GLSL_TYPE_FLOAT;
>> + src_components = 3;
>> + break;
>> + case GL_FLOAT_VEC4:
>> + basicType = GLSL_TYPE_FLOAT;
>> + src_components = 4;
>> + break;
>> + case GL_UNSIGNED_INT:
>> + basicType = GLSL_TYPE_UINT;
>> + src_components = 1;
>> + break;
>> + case GL_UNSIGNED_INT_VEC2:
>> + basicType = GLSL_TYPE_UINT;
>> + src_components = 2;
>> + break;
>> + case GL_UNSIGNED_INT_VEC3:
>> + basicType = GLSL_TYPE_UINT;
>> + src_components = 3;
>> + break;
>> + case GL_UNSIGNED_INT_VEC4:
>> + basicType = GLSL_TYPE_UINT;
>> + src_components = 4;
>> + break;
>> + case GL_INT:
>> + basicType = GLSL_TYPE_INT;
>> + src_components = 1;
>> + break;
>> + case GL_INT_VEC2:
>> + basicType = GLSL_TYPE_INT;
>> + src_components = 2;
>> + break;
>> + case GL_INT_VEC3:
>> + basicType = GLSL_TYPE_INT;
>> + src_components = 3;
>> + break;
>> + case GL_INT_VEC4:
>> + basicType = GLSL_TYPE_INT;
>> + src_components = 4;
>> + break;
>> + case GL_BOOL:
>> + case GL_BOOL_VEC2:
>> + case GL_BOOL_VEC3:
>> + case GL_BOOL_VEC4:
>> + case GL_FLOAT_MAT2:
>> + case GL_FLOAT_MAT2x3:
>> + case GL_FLOAT_MAT2x4:
>> + case GL_FLOAT_MAT3x2:
>> + case GL_FLOAT_MAT3:
>> + case GL_FLOAT_MAT3x4:
>> + case GL_FLOAT_MAT4x2:
>> + case GL_FLOAT_MAT4x3:
>> + case GL_FLOAT_MAT4:
>> + default:
>> + _mesa_problem(NULL, "Invalid type in %s", __func__);
>> + return;
>> + }
>>
>> - uniform = &shProg->Uniforms->Uniforms[loc];
>> + if (uni->type->is_sampler()) {
>> + components = 1;
>> + } else {
>> + components = uni->type->vector_elements;
>> + }
>> +
>> + bool match;
>> + switch (uni->type->base_type) {
>> + case GLSL_TYPE_BOOL:
>> + match = true;
>> + break;
>> + case GLSL_TYPE_SAMPLER:
>> + match = (basicType == GLSL_TYPE_INT);
>> + break;
>> + default:
>> + match = (basicType == uni->type->base_type);
>> + break;
>> + }
>> +
>> + if (uni->type->is_matrix() || components != src_components || !match) {
>> + _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)");
>> + return;
>> + }
>>
>> if (ctx->Shader.Flags & GLSL_UNIFORMS) {
>> - const GLenum basicType = base_uniform_type(type);
>> - GLint i;
>> - printf("Mesa: set program %u uniform %s (loc %d) to: ",
>> - shProg->Name, uniform->Name, location);
>> - if (basicType == GL_INT) {
>> - const GLint *v = (const GLint *) values;
>> - for (i = 0; i < count * elems; i++) {
>> - printf("%d ", v[i]);
>> - }
>> - }
>> - else if (basicType == GL_UNSIGNED_INT) {
>> - const GLuint *v = (const GLuint *) values;
>> - for (i = 0; i < count * elems; i++) {
>> - printf("%u ", v[i]);
>> - }
>> - }
>> - else {
>> - const GLfloat *v = (const GLfloat *) values;
>> - assert(basicType == GL_FLOAT);
>> - for (i = 0; i < count * elems; i++) {
>> - printf("%g ", v[i]);
>> + log_uniform(values, basicType, components, 1, count,
>> + false, shProg, location, uni);
>> + }
>> +
>> + /* Validate the texture unit setting.
>> + */
>> + /* FINISHME: I cannot find any justification for this in the GL spec.
>> + */
>> + if (uni->type->is_sampler()) {
>> + for (i = 0; i < count; i++) {
>> + const unsigned texUnit = ((unsigned *) values)[i];
>> +
>> + /* check that the sampler (tex unit index) is legal */
>> + if (texUnit >= ctx->Const.MaxCombinedTextureImageUnits) {
>> + _mesa_error(ctx, GL_INVALID_VALUE,
>> + "glUniform1i(invalid sampler/tex unit index for "
>> + "uniform %d)",
>> + location);
>> + return;
>> }
>> }
>> - printf("\n");
>> }
>>
>> - /* A uniform var may be used by both a vertex shader and a fragment
>> - * shader. We may need to update one or both shader's uniform here:
>> + /* Page 82 (page 96 of the PDF) of the OpenGL 2.1 spec says:
>> + *
>> + * "When loading N elements starting at an arbitrary position k in a
>> + * uniform declared as an array, elements k through k + N - 1 in the
>> + * array will be replaced with the new values. Values for any array
>> + * element that exceeds the highest array element index used, as
>> + * reported by GetActiveUniform, will be ignored by the GL."
>> + *
>> + * Clamp 'count' to a valid value. Note that for non-arrays a count > 1
>> + * will have already generated an error.
>> */
>> - if (shProg->_LinkedShaders[MESA_SHADER_VERTEX]) {
>> - /* convert uniform location to program parameter index */
>> - GLint index = uniform->VertPos;
>> - if (index >= 0) {
>> - set_program_uniform(ctx,
>> - shProg->_LinkedShaders[MESA_SHADER_VERTEX]->Program,
>> - index, offset, type, count, elems, values);
>> - }
>> + if (uni->array_elements != 0) {
>> + if (offset >= uni->array_elements)
>> + return;
>> +
>> + count = MIN2(count, (uni->array_elements - offset));
>> }
>>
>> - if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
>> - /* convert uniform location to program parameter index */
>> - GLint index = uniform->FragPos;
>> - if (index >= 0) {
>> - set_program_uniform(ctx,
>> - shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program,
>> - index, offset, type, count, elems, values);
>> + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
>> +
>> + /* Store the data in the "actual type" backing storage for the uniform.
>> + */
>> + if (!uni->type->is_boolean()) {
>> + memcpy(&uni->storage[components * offset], values,
>> + sizeof(uni->storage[0]) * components * count);
>> + } else {
>> + const union gl_constant_value *src =
>> + (const union gl_constant_value *) values;
>> + union gl_constant_value *dst = &uni->storage[components * offset];
>> + const unsigned elems = components * count;
>> +
>> + for (i = 0; i < elems; i++) {
>> + if (basicType == GLSL_TYPE_FLOAT) {
>> + dst[i].i = src[i].f != 0.0f ? 1 : 0;
>> + } else {
>> + dst[i].i = src[i].i != 0 ? 1 : 0;
>> + }
>> }
>> }
>>
>> - if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]) {
>> - /* convert uniform location to program parameter index */
>> - GLint index = uniform->GeomPos;
>> - if (index >= 0) {
>> - set_program_uniform(ctx,
>> - shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program,
>> - index, offset, type, count, elems, values);
>> + uni->initialized = true;
>> + uni->dirty = true;
>> +
>> + _mesa_propagate_uniforms_to_driver_storage(uni, offset, count);
>> +
>> + /* If the uniform is a sampler, do the extra magic necessary to propagate
>> + * the changes through.
>> + */
>> + if (uni->type->is_sampler()) {
>> + for (i = 0; i < count; i++) {
>> + shProg->SamplerUnits[uni->sampler + offset + i] =
>> + ((unsigned *) values)[i];
>> }
>> - }
>>
>> - uniform->Initialized = GL_TRUE;
>> + bool flushed = false;
>> + for (i = 0; i < MESA_SHADER_TYPES; i++) {
>> + struct gl_program *prog;
>> +
>> + if (shProg->_LinkedShaders[i] == NULL)
>> + continue;
>> +
>> + prog = shProg->_LinkedShaders[i]->Program;
>> +
>> + assert(sizeof(prog->SamplerUnits) == sizeof(shProg->SamplerUnits));
>> +
>> + if (memcmp(prog->SamplerUnits,
>> + shProg->SamplerUnits,
>> + sizeof(shProg->SamplerUnits)) != 0) {
>> + if (!flushed) {
>> + FLUSH_VERTICES(ctx, _NEW_TEXTURE | _NEW_PROGRAM);
>> + flushed = true;
>> + }
>> +
>> + memcpy(prog->SamplerUnits,
>> + shProg->SamplerUnits,
>> + sizeof(shProg->SamplerUnits));
>> +
>> + _mesa_update_shader_textures_used(prog);
>> + (void) ctx->Driver.ProgramStringNotify(ctx, prog->Target, prog);
>
> Hi Ian,
>
> This ProgramStringNotify call causes excessive shader recompilations
> in the game Cogs (from some Humble Bundle). Approximately 25% of
> in-game CPU time is spent on translating shaders and register
> allocation. If the shaders were larger, it would be much worse.
> Moreover, I have realized calling ProgramStringNotify is not needed
> for st/mesa in this case at all (I just comment it out if I want
> higher frame rates), because it doesn't use the prog->SamplerUnits
> array when translating shaders. The array is only used when we set
> samplers, so setting _NEW_TEXTURE is sufficient. It seems that i965
> could do the same thing. Another option would be to cache precompiled
> shaders, but that seems more complicated than doing what st/mesa does.
>
> Marek
Yeah, I noticed that a while back too, but we never arrived at a
solution and it slipped off my radar. I agree completely,
ProgramStringNotify is completely overkill and bogus.
We actually already rely on _NEW_TEXTURE for all shader programs. Since
the uniform code already flags _NEW_TEXTURE, we should be able to just
add the SamplerUnits array to the program key (specifically,
brw_sampler_prog_key_data), and that should take care of any state
dependent recompiles. Then we wouldn't need ProgramStringNotify at all.
Right?
More information about the mesa-dev
mailing list