[Mesa-dev] [PATCH 2/2] gallivm: only fetch pointers to constant buffers once

Jose Fonseca jfonseca at vmware.com
Wed May 14 04:43:41 PDT 2014



----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
> 
> In 1d35f77228ad540a551a8e09e062b764a6e31f5e support for multiple constant
> buffers was introduced. This meant we had another indirection, and we did
> resolve the indirection for each constant buffer access. This looks very
> reasonable since llvm can figure out if it's the same pointer, however it
> turns out that this can cause llvm compilation time to go through the roof
> and beyond (I've seen cases in excess of factor 100, e.g. from 50 ms to more
> than 10 seconds (!)), with all the additional time spent in IR optimization
> passes (and in the end all of it in DominatorTree::dominate()).
> I've been unable to narrow it down a bit more (only some shaders seem
> affected,
> seemingly without much correlation to overall shader complexity or constant
> usage) but it is easily avoidable by doing the buffer lookups themeselves
> just
> once (at constant buffer declaration time).
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.h     |   2 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 100
>  +++++++++++++++---------
>  2 files changed, 65 insertions(+), 37 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index ffd6e87..88ac3c9 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -437,6 +437,8 @@ struct lp_build_tgsi_soa_context
>  
>     LLVMValueRef consts_ptr;
>     LLVMValueRef const_sizes_ptr;
> +   LLVMValueRef consts[LP_MAX_TGSI_CONST_BUFFERS];
> +   LLVMValueRef consts_sizes[LP_MAX_TGSI_CONST_BUFFERS];
>     const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
>     LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 2b47fc2..0eaa020 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1213,7 +1213,6 @@ emit_fetch_constant(
>     LLVMBuilderRef builder = gallivm->builder;
>     struct lp_build_context *uint_bld = &bld_base->uint_bld;
>     unsigned dimension = 0;
> -   LLVMValueRef dimension_index;
>     LLVMValueRef consts_ptr;
>     LLVMValueRef num_consts;
>     LLVMValueRef res;
> @@ -1227,11 +1226,8 @@ emit_fetch_constant(
>        assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
>     }
>  
> -   dimension_index = lp_build_const_int32(gallivm, dimension);
> -   consts_ptr =
> -      lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
> -   num_consts =
> -      lp_build_array_get(gallivm, bld->const_sizes_ptr, dimension_index);
> +   consts_ptr = bld->consts[dimension];
> +   num_consts = bld->consts_sizes[dimension];
>  
>     if (reg->Register.Indirect) {
>        LLVMValueRef indirect_index;
> @@ -2677,56 +2673,86 @@ lp_emit_declaration_soa(
>     const unsigned last = decl->Range.Last;
>     unsigned idx, i;
>  
> -   for (idx = first; idx <= last; ++idx) {
> -      assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
> -      switch (decl->Declaration.File) {
> -      case TGSI_FILE_TEMPORARY:
> -         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
> -            assert(idx < LP_MAX_INLINED_TEMPS);
> +   assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
> +
> +   switch (decl->Declaration.File) {
> +   case TGSI_FILE_TEMPORARY:
> +      if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
> +         assert(last < LP_MAX_INLINED_TEMPS);
> +         for (idx = first; idx <= last; ++idx) {
>              for (i = 0; i < TGSI_NUM_CHANNELS; i++)
>                 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type,
>                 "temp");
>           }
> -         break;
> +      }
> +      break;
>  
> -      case TGSI_FILE_OUTPUT:
> -         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
> +   case TGSI_FILE_OUTPUT:
> +      if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
> +         for (idx = first; idx <= last; ++idx) {
>              for (i = 0; i < TGSI_NUM_CHANNELS; i++)
>                 bld->outputs[idx][i] = lp_build_alloca(gallivm,
>                                                        vec_type, "output");
>           }
> -         break;
> +      }
> +      break;
>  
> -      case TGSI_FILE_ADDRESS:
> -	 /* ADDR registers are only allocated with an integer LLVM IR type,
> -	  * as they are guaranteed to always have integers.
> -	  * XXX: Not sure if this exception is worthwhile (or the whole idea of
> -	  * an ADDR register for that matter).
> -	  */
> +   case TGSI_FILE_ADDRESS:
> +      /* ADDR registers are only allocated with an integer LLVM IR type,
> +       * as they are guaranteed to always have integers.
> +       * XXX: Not sure if this exception is worthwhile (or the whole idea of
> +       * an ADDR register for that matter).
> +       */
> +      assert(last < LP_MAX_TGSI_ADDRS);
> +      for (idx = first; idx <= last; ++idx) {
>           assert(idx < LP_MAX_TGSI_ADDRS);
>           for (i = 0; i < TGSI_NUM_CHANNELS; i++)
>              bld->addr[idx][i] = lp_build_alloca(gallivm,
>              bld_base->base.int_vec_type, "addr");
> -         break;
> +      }
> +      break;
>  
> -      case TGSI_FILE_PREDICATE:
> -         assert(idx < LP_MAX_TGSI_PREDS);
> +   case TGSI_FILE_PREDICATE:
> +      assert(last < LP_MAX_TGSI_PREDS);
> +      for (idx = first; idx <= last; ++idx) {
>           for (i = 0; i < TGSI_NUM_CHANNELS; i++)
>              bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
>                                                   "predicate");
> -         break;
> +      }
> +      break;
>  
> -      case TGSI_FILE_SAMPLER_VIEW:
> -         /*
> -          * The target stored here MUST match whatever there actually
> -          * is in the set sampler views (what about return type?).
> -          */
> -         assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
> +   case TGSI_FILE_SAMPLER_VIEW:
> +      /*
> +       * The target stored here MUST match whatever there actually
> +       * is in the set sampler views (what about return type?).
> +       */
> +      assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
> +      for (idx = first; idx <= last; ++idx) {
>           bld->sv[idx] = decl->SamplerView;
> -         break;
> -
> -      default:
> -         /* don't need to declare other vars */
> -         break;
>        }
> +      break;
> +
> +   case TGSI_FILE_CONSTANT:
> +   {
> +      /*
> +       * We could trivially fetch the per-buffer pointer when fetching the
> +       * constant, relying on llvm to figure out it's always the same
> pointer
> +       * anyway. However, doing so results in a huge (more than factor of
> 10)
> +       * slowdown in llvm compilation times for some (but not all) shaders
> +       * (more specifically, the IR optimization spends way more time in
> +       * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
> +       */
> +      unsigned idx2D = decl->Dim.Index2D;
> +      LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
> +      assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
> +      bld->consts[idx2D] =
> +         lp_build_array_get(gallivm, bld->consts_ptr, index2D);
> +      bld->consts_sizes[idx2D] =
> +         lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
> +   }
> +      break;
> +
> +   default:
> +      /* don't need to declare other vars */
> +      break;
>     }
>  }

Looks good.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

I wonder if there are other bits from the state that we should putting in variables at the top code block. 

Jose


More information about the mesa-dev mailing list