[Mesa-dev] [PATCH v2 56/59] i965/fs: push first double-based uniforms in push constant buffer

Samuel Iglesias Gonsálvez siglesias at igalia.com
Fri May 6 07:57:13 UTC 2016


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

Kenneth gave his R-b to this patch on IRC:

<Kayden> "i965/fs: push first double-based uniforms in push constant
buffer" gets my R-b

On 06/05/16 08:56, Samuel Iglesias Gonsálvez wrote:
> When there is a mix of definitions of uniforms with 32-bit or
> 64-bit data type sizes, the driver ends up doing misaligned access
> to double based variables in the push constant buffer.
> 
> To fix this, this patch pushes first all the 64-bit variables and 
> then the rest. Then, all the variables would be aligned to its data
> type size.
> 
> Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com> 
> --- src/mesa/drivers/dri/i965/brw_fs.cpp | 113
> +++++++++++++++++++++++++---------- 1 file changed, 83
> insertions(+), 30 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp index 65aa3c7..7eaf1b4
> 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++
> b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -39,6 +39,7 @@ #include
> "brw_program.h" #include "brw_dead_control_flow.h" #include
> "compiler/glsl_types.h" +#include "program/prog_parameter.h"
> 
> using namespace brw;
> 
> @@ -2004,6 +2005,45 @@ fs_visitor::compact_virtual_grfs() return
> progress; }
> 
> +static void +set_push_pull_constant_loc(unsigned uniform, int
> &chunk_start, bool contiguous, +                           int
> *push_constant_loc, int *pull_constant_loc, +
> unsigned &num_push_constants, +                           unsigned
> &num_pull_constants, +                           const unsigned
> max_push_components, +                           const unsigned
> max_chunk_size, +                           struct
> brw_stage_prog_data *stage_prog_data) +{ +   /* This is the first
> live uniform in the chunk */ +   if (chunk_start < 0) +
> chunk_start = uniform; + +   /* If this element does not need to be
> contiguous with the next, we +    * split at this point and
> everthing between chunk_start and u forms a +    * single chunk. +
> */ +   if (!contiguous) { +      unsigned chunk_size = uniform -
> chunk_start + 1; + +      /* Decide whether we should push or pull
> this parameter.  In the +       * Vulkan driver, push constants are
> explicitly exposed via the API +       * so we push everything.  In
> GL, we only push small arrays. +       */ +      if
> (stage_prog_data->pull_param == NULL || +
> (num_push_constants + chunk_size <= max_push_components && +
> chunk_size <= max_chunk_size)) { +
> assert(num_push_constants + chunk_size <= max_push_components); +
> for (unsigned j = chunk_start; j <= uniform; j++) +
> push_constant_loc[j] = num_push_constants++; +      } else { +
> for (unsigned j = chunk_start; j <= uniform; j++) +
> pull_constant_loc[j] = num_pull_constants++; +      } + +
> chunk_start = -1; +   } +} + /** * Assign UNIFORM file registers to
> either push constants or pull constants. * @@ -2022,6 +2062,8 @@
> fs_visitor::assign_constant_locations()
> 
> bool is_live[uniforms]; memset(is_live, 0, sizeof(is_live)); +
> bool is_live_64bit[uniforms]; +   memset(is_live_64bit, 0,
> sizeof(is_live_64bit));
> 
> /* For each uniform slot, a value of true indicates that the given
> slot and * the next slot must remain contiguous.  This is used to
> keep us from @@ -2054,14 +2096,21 @@
> fs_visitor::assign_constant_locations() for (unsigned j =
> constant_nr; j < last; j++) { is_live[j] = true; contiguous[j] =
> true; +               if (type_sz(inst->src[i].type) == 8) { +
> is_live_64bit[j] = true; +               } } is_live[last] = true; 
> } else { if (constant_nr >= 0 && constant_nr < (int) uniforms) { 
> int regs_read = inst->components_read(i) * 
> type_sz(inst->src[i].type) / 4; -               for (int j = 0; j <
> regs_read; j++) +               for (int j = 0; j < regs_read; j++)
> { is_live[constant_nr + j] = true; +                  if
> (type_sz(inst->src[i].type) == 8) { +
> is_live_64bit[constant_nr + j] = true; +                  } +
> } } } } @@ -2090,43 +2139,46 @@
> fs_visitor::assign_constant_locations() pull_constant_loc =
> ralloc_array(mem_ctx, int, uniforms);
> 
> int chunk_start = -1; + + +   /* First push 64-bit uniforms */ for
> (unsigned u = 0; u < uniforms; u++) { -      push_constant_loc[u] =
> -1; +      if (!is_live[u] || !is_live_64bit[u]) +
> continue; + pull_constant_loc[u] = -1; +      push_constant_loc[u]
> = -1;
> 
> -      if (!is_live[u]) -         continue; +
> set_push_pull_constant_loc(u, chunk_start, contiguous[u], +
> push_constant_loc, pull_constant_loc, +
> num_push_constants, num_pull_constants, +
> max_push_components, max_chunk_size, +
> stage_prog_data);
> 
> -      /* This is the first live uniform in the chunk */ -      if
> (chunk_start < 0) -         chunk_start = u; +   }
> 
> -      /* If this element does not need to be contiguous with the
> next, we -       * split at this point and everthing between
> chunk_start and u forms a -       * single chunk. -       */ -
> if (!contiguous[u]) { -         unsigned chunk_size = u -
> chunk_start + 1; +   /* Then push the rest of uniforms */ +   for
> (unsigned u = 0; u < uniforms; u++) { +      if (!is_live[u] ||
> is_live_64bit[u]) +         continue;
> 
> -         /* Decide whether we should push or pull this parameter.
> In the -          * Vulkan driver, push constants are explicitly
> exposed via the API -          * so we push everything.  In GL, we
> only push small arrays. -          */ -         if
> (stage_prog_data->pull_param == NULL || -
> (num_push_constants + chunk_size <= max_push_components && -
> chunk_size <= max_chunk_size)) { -
> assert(num_push_constants + chunk_size <= max_push_components); -
> for (unsigned j = chunk_start; j <= u; j++) -
> push_constant_loc[j] = num_push_constants++; -         } else { -
> for (unsigned j = chunk_start; j <= u; j++) -
> pull_constant_loc[j] = num_pull_constants++; -         } +
> pull_constant_loc[u] = -1; +      push_constant_loc[u] = -1;
> 
> -         chunk_start = -1; -      } +
> set_push_pull_constant_loc(u, chunk_start, contiguous[u], +
> push_constant_loc, pull_constant_loc, +
> num_push_constants, num_pull_constants, +
> max_push_components, max_chunk_size, +
> stage_prog_data); }
> 
> +   /* As the uniforms are going to be reordered, take the data
> from a temporal +    * copy of the original param[]. +    */ +
> gl_constant_value **param = rzalloc_array(mem_ctx,
> gl_constant_value*, +
> stage_prog_data->nr_params); +   memcpy(param,
> stage_prog_data->param, +          sizeof(gl_constant_value*) *
> stage_prog_data->nr_params); stage_prog_data->nr_params =
> num_push_constants; stage_prog_data->nr_pull_params =
> num_pull_constants;
> 
> @@ -2139,7 +2191,7 @@ fs_visitor::assign_constant_locations() *
> having to make a copy. */ for (unsigned int i = 0; i < uniforms;
> i++) { -      const gl_constant_value *value =
> stage_prog_data->param[i]; +      const gl_constant_value *value =
> param[i];
> 
> if (pull_constant_loc[i] != -1) { 
> stage_prog_data->pull_param[pull_constant_loc[i]] = value; @@
> -2147,6 +2199,7 @@ fs_visitor::assign_constant_locations() 
> stage_prog_data->param[push_constant_loc[i]] = value; } } +
> ralloc_free(param); }
> 
> /**
> 
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBCAAGBQJXLE5ZAAoJEH/0ujLxfcNDWx8P/ivUcJ+/lMPT/S/V7pD8Fbwc
4BocoIFrU4AwnO3Xn9FYIgG5r0XbbAeRP1Y0NRNRuwA/j7rbAoragCjLzsXflVAN
vXnDw+ZoEO3COdoI4BOvN69N+wyK4Ei44oSIAenDDSVNQwwV2xav83M8oiirDE8N
UVmHmiK0GYOgSAfBmvsMKoSd9YA5B3337plPommaOpNDHNaCmoHw7GCvc/zlDp2n
KVXioGuYDouDfT5NcwSbDR2CW/u0UpzwHQkzA6acOE6qubYx76WrjnHgWvUvbdlQ
S66Okz/Hs3yywhc2Ir7CRN1zYQqk4hpfJ5hZrn4AcWpjvuWdKYhW7ap3RnONSvbz
1I/Jr2F3++5nxRuelf0/BNAvwcnHyHolCXi16emdJHf6LrU7C9KwC6QpcH15dHvt
5+KaSM2fAl/y5uwqjvO++IHdmkW8z7FGSjCh203IFP+fQMld+vEPheLUpiL+3H9u
pQVG0EuJFBZ5fzymgALqF/nCbGiGhSuwUo7Q3hX6YtMsrBzHDZ/ahFxCa0Jp+lYd
8kmGpGpfUYG5cWCaGj5dGFwP1YNtogWOgMmmqqod39pSe3ztYZs1uWFjJg2Y+FOc
8ycLTiwproIR1P9RuUPKjsD///oiWXYT+bOzhCW71fkgQL9LB02yp45iIlNjjm7y
hDrNq6wZZbroS+bxX6Er
=P1yR
-----END PGP SIGNATURE-----


More information about the mesa-dev mailing list