[Mesa-dev] [PATCH 2/2] gallivm: only fetch pointers to constant buffers once
Jose Fonseca
jfonseca at vmware.com
Wed May 14 04:43:41 PDT 2014
----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
>
> In 1d35f77228ad540a551a8e09e062b764a6e31f5e support for multiple constant
> buffers was introduced. This meant we had another indirection, and we did
> resolve the indirection for each constant buffer access. This looks very
> reasonable since llvm can figure out if it's the same pointer, however it
> turns out that this can cause llvm compilation time to go through the roof
> and beyond (I've seen cases in excess of factor 100, e.g. from 50 ms to more
> than 10 seconds (!)), with all the additional time spent in IR optimization
> passes (and in the end all of it in DominatorTree::dominate()).
> I've been unable to narrow it down a bit more (only some shaders seem
> affected,
> seemingly without much correlation to overall shader complexity or constant
> usage) but it is easily avoidable by doing the buffer lookups themeselves
> just
> once (at constant buffer declaration time).
> ---
> src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 2 +
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 100
> +++++++++++++++---------
> 2 files changed, 65 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index ffd6e87..88ac3c9 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -437,6 +437,8 @@ struct lp_build_tgsi_soa_context
>
> LLVMValueRef consts_ptr;
> LLVMValueRef const_sizes_ptr;
> + LLVMValueRef consts[LP_MAX_TGSI_CONST_BUFFERS];
> + LLVMValueRef consts_sizes[LP_MAX_TGSI_CONST_BUFFERS];
> const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
> LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 2b47fc2..0eaa020 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1213,7 +1213,6 @@ emit_fetch_constant(
> LLVMBuilderRef builder = gallivm->builder;
> struct lp_build_context *uint_bld = &bld_base->uint_bld;
> unsigned dimension = 0;
> - LLVMValueRef dimension_index;
> LLVMValueRef consts_ptr;
> LLVMValueRef num_consts;
> LLVMValueRef res;
> @@ -1227,11 +1226,8 @@ emit_fetch_constant(
> assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
> }
>
> - dimension_index = lp_build_const_int32(gallivm, dimension);
> - consts_ptr =
> - lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
> - num_consts =
> - lp_build_array_get(gallivm, bld->const_sizes_ptr, dimension_index);
> + consts_ptr = bld->consts[dimension];
> + num_consts = bld->consts_sizes[dimension];
>
> if (reg->Register.Indirect) {
> LLVMValueRef indirect_index;
> @@ -2677,56 +2673,86 @@ lp_emit_declaration_soa(
> const unsigned last = decl->Range.Last;
> unsigned idx, i;
>
> - for (idx = first; idx <= last; ++idx) {
> - assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
> - switch (decl->Declaration.File) {
> - case TGSI_FILE_TEMPORARY:
> - if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
> - assert(idx < LP_MAX_INLINED_TEMPS);
> + assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
> +
> + switch (decl->Declaration.File) {
> + case TGSI_FILE_TEMPORARY:
> + if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
> + assert(last < LP_MAX_INLINED_TEMPS);
> + for (idx = first; idx <= last; ++idx) {
> for (i = 0; i < TGSI_NUM_CHANNELS; i++)
> bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type,
> "temp");
> }
> - break;
> + }
> + break;
>
> - case TGSI_FILE_OUTPUT:
> - if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
> + case TGSI_FILE_OUTPUT:
> + if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
> + for (idx = first; idx <= last; ++idx) {
> for (i = 0; i < TGSI_NUM_CHANNELS; i++)
> bld->outputs[idx][i] = lp_build_alloca(gallivm,
> vec_type, "output");
> }
> - break;
> + }
> + break;
>
> - case TGSI_FILE_ADDRESS:
> - /* ADDR registers are only allocated with an integer LLVM IR type,
> - * as they are guaranteed to always have integers.
> - * XXX: Not sure if this exception is worthwhile (or the whole idea of
> - * an ADDR register for that matter).
> - */
> + case TGSI_FILE_ADDRESS:
> + /* ADDR registers are only allocated with an integer LLVM IR type,
> + * as they are guaranteed to always have integers.
> + * XXX: Not sure if this exception is worthwhile (or the whole idea of
> + * an ADDR register for that matter).
> + */
> + assert(last < LP_MAX_TGSI_ADDRS);
> + for (idx = first; idx <= last; ++idx) {
> assert(idx < LP_MAX_TGSI_ADDRS);
> for (i = 0; i < TGSI_NUM_CHANNELS; i++)
> bld->addr[idx][i] = lp_build_alloca(gallivm,
> bld_base->base.int_vec_type, "addr");
> - break;
> + }
> + break;
>
> - case TGSI_FILE_PREDICATE:
> - assert(idx < LP_MAX_TGSI_PREDS);
> + case TGSI_FILE_PREDICATE:
> + assert(last < LP_MAX_TGSI_PREDS);
> + for (idx = first; idx <= last; ++idx) {
> for (i = 0; i < TGSI_NUM_CHANNELS; i++)
> bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
> "predicate");
> - break;
> + }
> + break;
>
> - case TGSI_FILE_SAMPLER_VIEW:
> - /*
> - * The target stored here MUST match whatever there actually
> - * is in the set sampler views (what about return type?).
> - */
> - assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
> + case TGSI_FILE_SAMPLER_VIEW:
> + /*
> + * The target stored here MUST match whatever there actually
> + * is in the set sampler views (what about return type?).
> + */
> + assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
> + for (idx = first; idx <= last; ++idx) {
> bld->sv[idx] = decl->SamplerView;
> - break;
> -
> - default:
> - /* don't need to declare other vars */
> - break;
> }
> + break;
> +
> + case TGSI_FILE_CONSTANT:
> + {
> + /*
> + * We could trivially fetch the per-buffer pointer when fetching the
> + * constant, relying on llvm to figure out it's always the same
> pointer
> + * anyway. However, doing so results in a huge (more than factor of
> 10)
> + * slowdown in llvm compilation times for some (but not all) shaders
> + * (more specifically, the IR optimization spends way more time in
> + * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
> + */
> + unsigned idx2D = decl->Dim.Index2D;
> + LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
> + assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
> + bld->consts[idx2D] =
> + lp_build_array_get(gallivm, bld->consts_ptr, index2D);
> + bld->consts_sizes[idx2D] =
> + lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
> + }
> + break;
> +
> + default:
> + /* don't need to declare other vars */
> + break;
> }
> }
Looks good.
Reviewed-by: Jose Fonseca <jfonseca at vmware.com>
I wonder if there are other bits from the state that we should putting in variables at the top code block.
Jose
More information about the mesa-dev
mailing list