[Mesa-dev] [PATCH] i965: Store uniform constant values in a gl_constant_value instead of float

Roland Scheidegger sroland at vmware.com
Tue Aug 12 11:21:24 PDT 2014


This looks good to me. However I'm wondering if it would be better to
use a generic float/int union. I guess though these values actually
really are gl_constant_value type (as they come as gl params) so this
looks good.

Roland

Am 12.08.2014 20:04, schrieb Neil Roberts:
> Hi,
> 
> Here is a replacement patch for the one to use memcpy when copying
> uniform values into the batch buffer here:
> 
> https://urldefense.proofpoint.com/v1/url?u=http://lists.freedesktop.org/archives/mesa-dev/2014-August/065179.html&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0A&m=ncVDxLJN0VB1NhIOJ5egzyvs0mgOh%2BTUNiisFv%2BIDIk%3D%0A&s=f4b3fa9e5baf4d613e672b238b05ed84529aaa97eac86244a3325912d85b5e9d
> 
> Eric Anholt suggested instead of using memcpy we should change the
> type of the pointer to uint32_t. I started to implement that idea but
> I found there were quite a few bits of debugging code that are trying
> to print the values as floats. I thought it might be cleaner if we
> make the pointers gl_constant_value instead because that will avoid
> having ugly casts in this debugging code and also makes it more
> obvious that the values are being overloaded.
> 
> This patch also ends up fixing a few other potential problems in
> places where we are copying floatings for example when copying into
> the pull params.
> 
> I've run the patch through all of the Piglit tests on Ivybridge with a
> 32-bit build and it doesn't cause any changes except to fix the five
> test cases mentioned in the bug report.
> 
> - Neil
> 
> ------- >8 --------------- (use git am --scissors to automatically chop here)
> 
> The brw_stage_prog_data struct previously contained an array of float pointers
> to the values of parameters. These were then copied into a batch buffer to
> upload the values using a regular assignment. However the float values were
> also being overloaded to store integer values for integer uniforms. This can
> break if x87 floating-point registers are used to do the assignment because
> the fst instruction tries to fix up invalid float values. If an integer
> constant happened to look like an invalid float value then it would get
> altered when it was copied into the batch buffer.
> 
> This patch changes the pointers to be gl_constant_value instead so that the
> assignment should end up copying without any alteration. This also makes it
> more obvious that the values being stored here are overloaded for multiple
> types.
> 
> There are some static asserts where the values are uploaded to ensure that the
> size of gl_constant_value is the same as a float.
> 
> Bugzilla: https://urldefense.proofpoint.com/v1/url?u=https://bugs.freedesktop.org/show_bug.cgi?id%3D81150&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0A&m=ncVDxLJN0VB1NhIOJ5egzyvs0mgOh%2BTUNiisFv%2BIDIk%3D%0A&s=806b5a3faa9c146303663d1d35b76fc847fb83ba5f7f3adb2a238f2e1b64ec37
> ---
>  src/mesa/drivers/dri/i965/brw_context.h          |  5 +++--
>  src/mesa/drivers/dri/i965/brw_curbe.c            | 22 ++++++++++++----------
>  src/mesa/drivers/dri/i965/brw_fs.cpp             |  6 +++---
>  src/mesa/drivers/dri/i965/brw_fs_fp.cpp          |  2 +-
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp     |  6 +++---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp           |  6 +++---
>  src/mesa/drivers/dri/i965/brw_vec4_gs.c          |  4 ++--
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 13 ++++++++-----
>  src/mesa/drivers/dri/i965/brw_vec4_vp.cpp        |  4 ++--
>  src/mesa/drivers/dri/i965/brw_vs.c               |  6 ++++--
>  src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 10 ++++++----
>  src/mesa/drivers/dri/i965/brw_wm.c               |  5 +++--
>  src/mesa/drivers/dri/i965/gen6_vs_state.c        |  8 +++++---
>  13 files changed, 55 insertions(+), 42 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 1bbcf46..157a605 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -41,6 +41,7 @@
>  #include "main/mtypes.h"
>  #include "brw_structs.h"
>  #include "intel_aub.h"
> +#include "program/prog_parameter.h"
>  
>  #ifdef __cplusplus
>  extern "C" {
> @@ -309,8 +310,8 @@ struct brw_stage_prog_data {
>      * These must be the last fields of the struct (see
>      * brw_stage_prog_data_compare()).
>      */
> -   const float **param;
> -   const float **pull_param;
> +   const gl_constant_value **param;
> +   const gl_constant_value **pull_param;
>  };
>  
>  /* Data about a particular attempt to compile a program.  Note that
> diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
> index 02eda5f..1a828ed 100644
> --- a/src/mesa/drivers/dri/i965/brw_curbe.c
> +++ b/src/mesa/drivers/dri/i965/brw_curbe.c
> @@ -196,7 +196,7 @@ brw_upload_constant_buffer(struct brw_context *brw)
>     /* BRW_NEW_CURBE_OFFSETS */
>     const GLuint sz = brw->curbe.total_size;
>     const GLuint bufsz = sz * 16 * sizeof(GLfloat);
> -   GLfloat *buf;
> +   gl_constant_value *buf;
>     GLuint i;
>     gl_clip_plane *clip_planes;
>  
> @@ -207,6 +207,8 @@ brw_upload_constant_buffer(struct brw_context *brw)
>     buf = intel_upload_space(brw, bufsz, 64,
>                              &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
>  
> +   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
> +
>     /* fragment shader constants */
>     if (brw->curbe.wm_size) {
>        /* BRW_NEW_CURBE_OFFSETS */
> @@ -226,10 +228,10 @@ brw_upload_constant_buffer(struct brw_context *brw)
>        /* If any planes are going this way, send them all this way:
>         */
>        for (i = 0; i < 6; i++) {
> -	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
> -	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
> -	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
> -	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
> +	 buf[offset + i * 4 + 0].f = fixed_plane[i][0];
> +	 buf[offset + i * 4 + 1].f = fixed_plane[i][1];
> +	 buf[offset + i * 4 + 2].f = fixed_plane[i][2];
> +	 buf[offset + i * 4 + 3].f = fixed_plane[i][3];
>        }
>  
>        /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
> @@ -238,10 +240,10 @@ brw_upload_constant_buffer(struct brw_context *brw)
>        clip_planes = brw_select_clip_planes(ctx);
>        for (j = 0; j < MAX_CLIP_PLANES; j++) {
>  	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
> -	    buf[offset + i * 4 + 0] = clip_planes[j][0];
> -	    buf[offset + i * 4 + 1] = clip_planes[j][1];
> -	    buf[offset + i * 4 + 2] = clip_planes[j][2];
> -	    buf[offset + i * 4 + 3] = clip_planes[j][3];
> +	    buf[offset + i * 4 + 0].f = clip_planes[j][0];
> +	    buf[offset + i * 4 + 1].f = clip_planes[j][1];
> +	    buf[offset + i * 4 + 2].f = clip_planes[j][2];
> +	    buf[offset + i * 4 + 3].f = clip_planes[j][3];
>  	    i++;
>  	 }
>        }
> @@ -260,7 +262,7 @@ brw_upload_constant_buffer(struct brw_context *brw)
>     if (0) {
>        for (i = 0; i < sz*16; i+=4)
>  	 fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
> -                 buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
> +                 buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
>     }
>  
>     /* Because this provokes an action (ie copy the constants into the
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 3aee822..8573a3d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -961,7 +961,7 @@ fs_visitor::setup_uniform_values(ir_variable *ir)
>           slots *= storage->array_elements;
>  
>        for (unsigned i = 0; i < slots; i++) {
> -         stage_prog_data->param[uniforms++] = &storage->storage[i].f;
> +         stage_prog_data->param[uniforms++] = &storage->storage[i];
>        }
>     }
>  
> @@ -1000,7 +1000,7 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
>  	 last_swiz = swiz;
>  
>           stage_prog_data->param[uniforms++] =
> -            &fp->Base.Parameters->ParameterValues[index][swiz].f;
> +            &fp->Base.Parameters->ParameterValues[index][swiz];
>        }
>     }
>  }
> @@ -1806,7 +1806,7 @@ fs_visitor::move_uniform_array_access_to_pull_constants()
>            * add it.
>            */
>           if (pull_constant_loc[uniform] == -1) {
> -            const float **values = &stage_prog_data->param[uniform];
> +            const gl_constant_value **values = &stage_prog_data->param[uniform];
>  
>              assert(param_size[uniform]);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> index 8d07be2..98df299 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> @@ -583,7 +583,7 @@ fs_visitor::setup_fp_regs()
>             p < prog->Parameters->NumParameters; p++) {
>           for (unsigned int i = 0; i < 4; i++) {
>              stage_prog_data->param[uniforms++] =
> -               &prog->Parameters->ParameterValues[p][i].f;
> +               &prog->Parameters->ParameterValues[p][i];
>           }
>        }
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index c16401b..2d5f18d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -1639,7 +1639,7 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
>        /* Try to find existing copies of the texrect scale uniforms. */
>        for (unsigned i = 0; i < uniforms; i++) {
>           if (stage_prog_data->param[i] ==
> -             &prog->Parameters->ParameterValues[index][0].f) {
> +             &prog->Parameters->ParameterValues[index][0]) {
>              scale_x = fs_reg(UNIFORM, i);
>              scale_y = fs_reg(UNIFORM, i + 1);
>              break;
> @@ -1652,9 +1652,9 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
>           scale_y = fs_reg(UNIFORM, uniforms + 1);
>  
>           stage_prog_data->param[uniforms++] =
> -            &prog->Parameters->ParameterValues[index][0].f;
> +            &prog->Parameters->ParameterValues[index][0];
>           stage_prog_data->param[uniforms++] =
> -            &prog->Parameters->ParameterValues[index][1].f;
> +            &prog->Parameters->ParameterValues[index][1];
>        }
>     }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 9a73f8f..b9dfc37 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -667,7 +667,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
>        pull_constant_loc[i / 4] = -1;
>  
>        if (i >= max_uniform_components) {
> -	 const float **values = &stage_prog_data->param[i];
> +	 const gl_constant_value **values = &stage_prog_data->param[i];
>  
>  	 /* Try to find an existing copy of this uniform in the pull
>  	  * constants if it was part of an array access already.
> @@ -1490,10 +1490,10 @@ vec4_visitor::setup_uniforms(int reg)
>        this->uniform_vector_size[this->uniforms] = 1;
>  
>        stage_prog_data->param =
> -         reralloc(NULL, stage_prog_data->param, const float *, 4);
> +         reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4);
>        for (unsigned int i = 0; i < 4; i++) {
>  	 unsigned int slot = this->uniforms * 4 + i;
> -	 static float zero = 0.0;
> +	 static gl_constant_value zero = { .f = 0.0 };
>  	 stage_prog_data->param[slot] = &zero;
>        }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> index 6428291..8daa555 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> @@ -65,9 +65,9 @@ do_gs_prog(struct brw_context *brw,
>     param_count += MAX_CLIP_PLANES * 4;
>  
>     c.prog_data.base.base.param =
> -      rzalloc_array(NULL, const float *, param_count);
> +      rzalloc_array(NULL, const gl_constant_value *, param_count);
>     c.prog_data.base.base.pull_param =
> -      rzalloc_array(NULL, const float *, param_count);
> +      rzalloc_array(NULL, const gl_constant_value *, param_count);
>     /* Setting nr_params here NOT to the size of the param and pull_param
>      * arrays, but to the number of uniform components vec4_visitor
>      * needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index 1b46850..4863cae 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -691,11 +691,11 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
>  
>           int i;
>           for (i = 0; i < uniform_vector_size[uniforms]; i++) {
> -            stage_prog_data->param[uniforms * 4 + i] = &components->f;
> +            stage_prog_data->param[uniforms * 4 + i] = components;
>              components++;
>           }
>           for (; i < 4; i++) {
> -            static float zero = 0;
> +            static gl_constant_value zero = { .f = 0.0 };
>              stage_prog_data->param[uniforms * 4 + i] = &zero;
>           }
>  
> @@ -715,7 +715,8 @@ vec4_visitor::setup_uniform_clipplane_values()
>        this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
>        this->userplane[i].type = BRW_REGISTER_TYPE_F;
>        for (int j = 0; j < 4; ++j) {
> -         stage_prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
> +         stage_prog_data->param[this->uniforms * 4 + j] =
> +            (gl_constant_value *) &clip_planes[i][j];
>        }
>        ++this->uniforms;
>     }
> @@ -739,7 +740,8 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
>         */
>        int index = _mesa_add_state_reference(this->prog->Parameters,
>  					    (gl_state_index *)slots[i].tokens);
> -      float *values = &this->prog->Parameters->ParameterValues[index][0].f;
> +      gl_constant_value *values =
> +         &this->prog->Parameters->ParameterValues[index][0];
>  
>        assert(this->uniforms < uniform_array_size);
>        this->uniform_vector_size[this->uniforms] = 0;
> @@ -3309,7 +3311,8 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
>  	  * add it.
>  	  */
>  	 if (pull_constant_loc[uniform] == -1) {
> -	    const float **values = &stage_prog_data->param[uniform * 4];
> +	    const gl_constant_value **values =
> +               &stage_prog_data->param[uniform * 4];
>  
>  	    pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> index f9c23ca..610fea2 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> @@ -401,7 +401,7 @@ vec4_vs_visitor::emit_program_code()
>        unsigned i;
>        for (i = 0; i < params->NumParameters * 4; i++) {
>           stage_prog_data->pull_param[i] =
> -            &params->ParameterValues[i / 4][i % 4].f;
> +            &params->ParameterValues[i / 4][i % 4];
>        }
>        stage_prog_data->nr_pull_params = i;
>     }
> @@ -432,7 +432,7 @@ vec4_vs_visitor::setup_vp_regs()
>        this->uniform_vector_size[this->uniforms] = components;
>        for (unsigned i = 0; i < 4; i++) {
>           stage_prog_data->param[this->uniforms * 4 + i] = i >= components
> -            ? 0 : &plist->ParameterValues[p][i].f;
> +            ? 0 : &plist->ParameterValues[p][i];
>        }
>        this->uniforms++; /* counted in vec4 units */
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
> index 19b1d3b..4574c3e 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -233,8 +233,10 @@ do_vs_prog(struct brw_context *brw,
>      */
>     param_count += c.key.base.nr_userclip_plane_consts * 4;
>  
> -   stage_prog_data->param = rzalloc_array(NULL, const float *, param_count);
> -   stage_prog_data->pull_param = rzalloc_array(NULL, const float *, param_count);
> +   stage_prog_data->param =
> +      rzalloc_array(NULL, const gl_constant_value *, param_count);
> +   stage_prog_data->pull_param =
> +      rzalloc_array(NULL, const gl_constant_value *, param_count);
>  
>     /* Setting nr_params here NOT to the size of the param and pull_param
>      * arrays, but to the number of uniform components vec4_visitor
> diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
> index ef4a77a..1cc96cf 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
> @@ -76,8 +76,10 @@ brw_upload_pull_constants(struct brw_context *brw,
>     uint32_t size = prog_data->nr_pull_params * 4;
>     drm_intel_bo *const_bo = NULL;
>     uint32_t const_offset;
> -   float *constants = intel_upload_space(brw, size, 64,
> -                                         &const_bo, &const_offset);
> +   gl_constant_value *constants = intel_upload_space(brw, size, 64,
> +                                                     &const_bo, &const_offset);
> +
> +   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
>  
>     for (i = 0; i < prog_data->nr_pull_params; i++) {
>        constants[i] = *prog_data->pull_param[i];
> @@ -85,9 +87,9 @@ brw_upload_pull_constants(struct brw_context *brw,
>  
>     if (0) {
>        for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
> -	 float *row = &constants[i * 4];
> +	 const gl_constant_value *row = &constants[i * 4];
>  	 fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
> -                 i, row[0], row[1], row[2], row[3]);
> +                 i, row[0].f, row[1].f, row[2].f, row[3].f);
>        }
>     }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
> index 6e068d3..2e3cd4b 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -169,9 +169,10 @@ bool do_wm_prog(struct brw_context *brw,
>     }
>     /* The backend also sometimes adds params for texture size. */
>     param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
> -   prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
> +   prog_data.base.param =
> +      rzalloc_array(NULL, const gl_constant_value *, param_count);
>     prog_data.base.pull_param =
> -   rzalloc_array(NULL, const float *, param_count);
> +      rzalloc_array(NULL, const gl_constant_value *, param_count);
>     prog_data.base.nr_params = param_count;
>  
>     prog_data.barycentric_interp_modes =
> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> index 905e123..77f566c 100644
> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> @@ -67,13 +67,15 @@ gen6_upload_push_constants(struct brw_context *brw,
>     if (prog_data->nr_params == 0) {
>        stage_state->push_const_size = 0;
>     } else {
> -      float *param;
> +      gl_constant_value *param;
>        int i;
>  
>        param = brw_state_batch(brw, type,
> -			      prog_data->nr_params * sizeof(float),
> +			      prog_data->nr_params * sizeof(gl_constant_value),
>  			      32, &stage_state->push_const_offset);
>  
> +      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
> +
>        /* _NEW_PROGRAM_CONSTANTS
>         *
>         * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
> @@ -91,7 +93,7 @@ gen6_upload_push_constants(struct brw_context *brw,
>  	    if ((i & 7) == 0)
>  	       fprintf(stderr, "g%d: ",
>                         prog_data->dispatch_grf_start_reg + i / 8);
> -	    fprintf(stderr, "%8f ", param[i]);
> +	    fprintf(stderr, "%8f ", param[i].f);
>  	    if ((i & 7) == 7)
>  	       fprintf(stderr, "\n");
>  	 }
> 



More information about the mesa-dev mailing list