[Mesa-dev] [PATCH 56/59] i965/fs: align access to double-based uniforms in push constant buffer
Pohjolainen, Topi
topi.pohjolainen at intel.com
Mon May 2 13:39:19 UTC 2016
On Mon, May 02, 2016 at 04:28:18PM +0300, Pohjolainen, Topi wrote:
> On Fri, Apr 29, 2016 at 01:29:53PM +0200, Samuel Iglesias Gons?lvez wrote:
> > When there is a mix of definitions of uniforms with 32-bit or 64-bit
> > data type sizes, the driver ends up doing misaligned access to double
> > based variables in the push constant buffer.
> >
> > To fix this, the driver adds padding when needed in the push constant
> > buffer and takes it into account to avoid exceeding its limits and to
> > update the GRF registers to access uniform variables.
> >
> > v2 (Iago):
> > - Renamed some variables for clarity.
> > - Replace the boolean array of paddings that tracked if each param was
> > padded by an integer array that keeps count of accumnulated padding.
> > This allows us to remove a couple of loops that had to compute that.
> > - Reworked things a bit so we can get rid of the nr_paddings field in
> > brw_stage_prog_data.
> > - Use rzalloc_array instead or ralloc_array and memset.
> > - Fixed wrong indentation.
> >
> > Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
> > Signed-off-by: Iago Toral Quiroga <itoral at igalia.com>
> > ---
> > src/mesa/drivers/dri/i965/brw_compiler.h | 8 ++++
> > src/mesa/drivers/dri/i965/brw_fs.cpp | 50 ++++++++++++++++++++++---
> > src/mesa/drivers/dri/i965/brw_program.c | 1 +
> > src/mesa/drivers/dri/i965/brw_wm.c | 2 +
> > src/mesa/drivers/dri/i965/gen6_constant_state.c | 12 ++++--
> > 5 files changed, 64 insertions(+), 9 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
> > index 5807305..bc15bf3 100644
> > --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> > @@ -333,6 +333,14 @@ struct brw_stage_prog_data {
> > } binding_table;
> >
> > GLuint nr_params; /**< number of float params/constants */
> > + /**
> > + * Accumulated padding (in 32-bit units) in the push constant buffer for
> > + * each param. If param_padding[n] = P, it means that params 0..n required
> > + * a total accumulated padding of P 32-bit slots. This is useful to compute
> > + * the actual location, including padding, for each param.
> > + */
> > + uint32_t *param_padding;
> > +
> > GLuint nr_pull_params;
> > unsigned nr_image_params;
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index 53f709b..5aa6ded 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -1571,7 +1571,8 @@ fs_visitor::assign_curb_setup()
> > int uniform_nr = inst->src[i].nr + inst->src[i].reg_offset;
> > int constant_nr;
> > if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
> > - constant_nr = push_constant_loc[uniform_nr];
> > + constant_nr = push_constant_loc[uniform_nr] +
> > + stage_prog_data->param_padding[uniform_nr];
> > } else {
> > /* Section 5.11 of the OpenGL 4.1 spec says:
> > * "Out-of-bounds reads return undefined values, which include
> > @@ -2010,7 +2011,10 @@ fs_visitor::assign_constant_locations()
> > return;
> >
> > bool is_live[uniforms];
> > + bool is_64bit[uniforms];
> > +
> > memset(is_live, 0, sizeof(is_live));
> > + memset(is_64bit, 0, sizeof(is_64bit));
> >
> > /* For each uniform slot, a value of true indicates that the given slot and
> > * the next slot must remain contiguous. This is used to keep us from
> > @@ -2047,8 +2051,11 @@ fs_visitor::assign_constant_locations()
> > is_live[last] = true;
> > } else {
> > if (constant_nr >= 0 && constant_nr < (int) uniforms) {
> > - for (int j = 0; j < inst->regs_read(i); j++)
> > + for (int j = 0; j < inst->regs_read(i); j++) {
> > is_live[constant_nr + j] = true;
> > + if (type_sz(inst->src[i].type) == 8)
> > + is_64bit[constant_nr + j] = true;
> > + }
> > }
> > }
> > }
> > @@ -2072,14 +2079,18 @@ fs_visitor::assign_constant_locations()
> >
> > unsigned int num_push_constants = 0;
> > unsigned int num_pull_constants = 0;
> > + unsigned num_paddings = 0;
> > + bool is_64bit_aligned = true;
> >
> > push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
> > pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
> > + stage_prog_data->param_padding = rzalloc_array(NULL, uint32_t, uniforms);
> >
> > int chunk_start = -1;
> > for (unsigned u = 0; u < uniforms; u++) {
> > push_constant_loc[u] = -1;
> > pull_constant_loc[u] = -1;
> > + stage_prog_data->param_padding[u] = num_paddings;
> >
> > if (!is_live[u])
> > continue;
> > @@ -2094,17 +2105,44 @@ fs_visitor::assign_constant_locations()
> > */
> > if (!contiguous[u]) {
> > unsigned chunk_size = u - chunk_start + 1;
> > + unsigned total_push_components = num_push_constants + num_paddings;
> > + bool push_32_bits_constant = !is_64bit[u] &&
> > + total_push_components + chunk_size <= max_push_components;
> > + /* Verify if the 64 bits constant can be saved into the push constant
> > + * buffer. Take into account that it might need padding.
> > + */
> > + bool push_64_bits_constant = is_64bit[u] &&
> > + (total_push_components + chunk_size + 1 + !is_64bit_aligned) <=
> > + max_push_components;
> >
> > /* Decide whether we should push or pull this parameter. In the
> > * Vulkan driver, push constants are explicitly exposed via the API
> > * so we push everything. In GL, we only push small arrays.
> > */
> > if (stage_prog_data->pull_param == NULL ||
> > - (num_push_constants + chunk_size <= max_push_components &&
> > + ((push_32_bits_constant || push_64_bits_constant) &&
> > chunk_size <= max_chunk_size)) {
> > - assert(num_push_constants + chunk_size <= max_push_components);
> > - for (unsigned j = chunk_start; j <= u; j++)
> > + assert(total_push_components + chunk_size <= max_push_components);
> > +
> > + for (unsigned j = chunk_start; j <= u; j++) {
> > + /* If it is a double and it is not aligned to 64 bits, add padding.
> > + * It is already aligned, don't add padding.
> > + */
> > + if (push_64_bits_constant) {
> > + if (!is_64bit_aligned) {
> > + num_paddings++;
> > + stage_prog_data->param_padding[j] = num_paddings;
> > + is_64bit_aligned = true;
> > + }
> > + } else {
> > + /* We are pushing a 32 bits constant. Is the next push constant
> > + * aligned 64 bits?
> > + */
> > + is_64bit_aligned = !is_64bit_aligned;
> > + }
> > +
> > push_constant_loc[j] = num_push_constants++;
> > + }
>
> I am probably greatly over simplifying but I have to ask. Could we simply
> add a hole in the push file by increasing the counter? After all it is
> push_constant_loc[] that controls in the end the offset in the instructions
> (after running assign_curb_setup()) and tells the loader where to copy the
> data from the uniform storage. Something of this sort:
>
> if (!contiguous[u]) {
> unsigned chunk_size = u - chunk_start + 1;
>
> + if (is_64bit[u] && num_push_constants % 2)
> + num_push_constants++;
>
> /* Decide whether we should push or pull this parameter. In the
> * Vulkan driver, push constants are explicitly exposed via the API
> * so we push everything. In GL, we only push small arrays.
> */
> if (stage_prog_data->pull_param == NULL ||
> (num_push_constants + chunk_size <= max_push_components &&
> chunk_size <= max_chunk_size)) {
> assert(num_push_constants + chunk_size <= max_push_components);
> for (unsigned j = chunk_start; j <= u; j++)
> push_constant_loc[j] = num_push_constants++;
>
>
> > } else {
> > for (unsigned j = chunk_start; j <= u; j++)
> > pull_constant_loc[j] = num_pull_constants++;
> > @@ -2114,7 +2152,7 @@ fs_visitor::assign_constant_locations()
> > }
> > }
> >
> > - stage_prog_data->nr_params = num_push_constants;
> > + stage_prog_data->nr_params = num_push_constants + num_paddings;
> > stage_prog_data->nr_pull_params = num_pull_constants;
> >
> > /* Up until now, the param[] array has been indexed by reg + reg_offset
> > diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
> > index 3112c0c..2d145cd 100644
> > --- a/src/mesa/drivers/dri/i965/brw_program.c
> > +++ b/src/mesa/drivers/dri/i965/brw_program.c
> > @@ -555,6 +555,7 @@ brw_stage_prog_data_free(const void *p)
> > struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
> >
> > ralloc_free(prog_data->param);
> > + ralloc_free(prog_data->param_padding);
> > ralloc_free(prog_data->pull_param);
> > ralloc_free(prog_data->image_param);
> > }
> > diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
> > index dbc626c..025d484 100644
> > --- a/src/mesa/drivers/dri/i965/brw_wm.c
> > +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> > @@ -103,6 +103,8 @@ brw_codegen_wm_prog(struct brw_context *brw,
> > param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
> > prog_data.base.param =
> > rzalloc_array(NULL, const gl_constant_value *, param_count);
> > + prog_data.base.param_padding =
> > + rzalloc_array(NULL, uint32_t, param_count);
> > prog_data.base.pull_param =
> > rzalloc_array(NULL, const gl_constant_value *, param_count);
> > prog_data.base.image_param =
> > diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
> > index 6c0c32b..3d0caf2 100644
> > --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
> > @@ -135,7 +135,9 @@ gen6_upload_push_constants(struct brw_context *brw,
> > _mesa_load_state_parameters(ctx, prog->Parameters);
> >
> > gl_constant_value *param;
> > - int i;
> > + int i, p;
> > + gl_constant_value zero;
> > + zero.u = 0;
> >
> > param = brw_state_batch(brw, type,
> > prog_data->nr_params * sizeof(gl_constant_value),
> > @@ -149,8 +151,12 @@ gen6_upload_push_constants(struct brw_context *brw,
> > * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
> > * wouldn't be set for them.
> > */
> > - for (i = 0; i < prog_data->nr_params; i++) {
> > - param[i] = *prog_data->param[i];
There would be corresponding holes in param[] and therefore this would become:
if (prog_data->param[i]) {
param[i] = *prog_data->param[i];
} else {
param[i] = zero;
}
> > + for (i = 0, p = 0; i < prog_data->nr_params; i++) {
> > + if (p > 0 && prog_data->param_padding &&
> > + prog_data->param_padding[p] > prog_data->param_padding[p - 1]) {
> > + param[i++] = zero;
> > + }
> > + param[i] = *prog_data->param[p++];
> > }
> >
> > if (0) {
> > --
> > 2.5.0
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list