[Mesa-dev] [PATCH] gallivm: Use alloca_undef with array type instead of alloca_array
Jose Fonseca
jfonseca at vmware.com
Tue May 15 08:30:28 UTC 2018
On 15/05/18 03:51, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> Use a single allocation of array type instead of the old-style array
> allocation for the temp and immediate arrays.
> Probably only makes a difference if they aren't used indirectly (so,
> if we used them solely because there's too many temps or immediates).
> In this case the sroa and early-cse passes can sometimes do some
> optimizations which they otherwise cannot.
> (As a side note, for the temp reg array, we actually really should
> use one allocation per array id, not just one for everything.)
> Note that the instcombine pass would actually promote such
> allocations to single alloc of array type as well, but it's too late
> for some artificial shaders we've seen to help (we don't want to run
> instcombine at the beginning due to its cost, hence would need
> another sroa/cse pass after instcombine). sroa/early-cse help there
> because they can actually eliminate all of the huge shader, reducing
> it to a single const output (don't ask...).
> (Interestingly, instcombine also removes all the bitcasts we do on that
> allocation for single-value gathering, and in the end directly indexes
> into the single vector elements, which according to spec is only
> semi-valid, but this happens regardless. Another thing instcombine also
> does is use inbound GEPs, which is probably something we should do
> manually as well - for indirectly indexed reg files llvm may not be
> able to figure it out on its own, but we should be able to guarantee
> all pointers are always inbound. In any case, by the looks of it
> using single allocation with array type seems to be the right thing
> to do even for ordinary shaders.)
> ---
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 61 +++++++++++++------------
> 1 file changed, 33 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index e411f90..83d7dbe 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -741,7 +741,8 @@ static void lp_exec_mask_store(struct lp_exec_mask *mask,
>
> assert(lp_check_value(bld_store->type, val));
> assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
> - assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
> + assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
> + LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind);
>
> if (exec_mask) {
> LLVMValueRef res, dst;
> @@ -852,7 +853,14 @@ get_file_ptr(struct lp_build_tgsi_soa_context *bld,
>
> if (bld->indirect_files & (1 << file)) {
> LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
> - return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
> + if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
> + LLVMValueRef gep[2];
> + gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
> + gep[1] = lindex;
> + return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
> + } else {
> + return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
> + }
> }
> else {
> assert(index <= bld->bld_base.info->file_max[file]);
> @@ -1352,21 +1360,20 @@ emit_fetch_immediate(
> /* Gather values from the immediate register array */
> res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
> } else {
> - LLVMValueRef lindex = lp_build_const_int32(gallivm,
> - reg->Register.Index * 4 + swizzle);
> - LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
> - bld->imms_array, &lindex, 1, "");
> + LLVMValueRef gep[2];
> + gep[0] = lp_build_const_int32(gallivm, 0);
> + gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
> + LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
> + bld->imms_array, gep, 2, "");
> res = LLVMBuildLoad(builder, imms_ptr, "");
>
> if (tgsi_type_is_64bit(stype)) {
> - LLVMValueRef lindex1;
> LLVMValueRef imms_ptr2;
> LLVMValueRef res2;
> -
> - lindex1 = lp_build_const_int32(gallivm,
> - reg->Register.Index * 4 + swizzle + 1);
> + gep[1] = lp_build_const_int32(gallivm,
> + reg->Register.Index * 4 + swizzle + 1);
> imms_ptr2 = LLVMBuildGEP(builder,
> - bld->imms_array, &lindex1, 1, "");
> + bld->imms_array, gep, 2, "");
> res2 = LLVMBuildLoad(builder, imms_ptr2, "");
> res = emit_fetch_64bit(bld_base, stype, res, res2);
> }
> @@ -2957,13 +2964,14 @@ void lp_emit_immediate_soa(
> unsigned index = bld->num_immediates;
> struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> + LLVMValueRef gep[2];
> + gep[0] = lp_build_const_int32(gallivm, 0);
>
> assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
> for (i = 0; i < 4; ++i ) {
> - LLVMValueRef lindex = lp_build_const_int32(
> - bld->bld_base.base.gallivm, index * 4 + i);
> + gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
> LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
> - bld->imms_array, &lindex, 1, "");
> + bld->imms_array, gep, 2, "");
> LLVMBuildStore(builder, imms[i], imm_ptr);
> }
> } else {
> @@ -2979,11 +2987,12 @@ void lp_emit_immediate_soa(
> unsigned index = bld->num_immediates;
> struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> + LLVMValueRef gep[2];
> + gep[0] = lp_build_const_int32(gallivm, 0);
> for (i = 0; i < 4; ++i ) {
> - LLVMValueRef lindex = lp_build_const_int32(
> - bld->bld_base.base.gallivm, index * 4 + i);
> + gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
> LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
> - bld->imms_array, &lindex, 1, "");
> + bld->imms_array, gep, 2, "");
> LLVMBuildStore(builder,
> bld->immediates[index][i],
> imm_ptr);
> @@ -3649,12 +3658,10 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
> struct gallivm_state * gallivm = bld_base->base.gallivm;
>
> if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> - LLVMValueRef array_size =
> - lp_build_const_int32(gallivm,
> - bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
> - bld->temps_array = lp_build_array_alloca(gallivm,
> - bld_base->base.vec_type, array_size,
> - "temp_array");
> + unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
> + bld->temps_array = lp_build_alloca_undef(gallivm,
> + LLVMArrayType(bld_base->base.vec_type, array_size),
> + "temp_array");
> }
>
> if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> @@ -3667,11 +3674,9 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
> }
>
> if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
> - LLVMValueRef array_size =
> - lp_build_const_int32(gallivm,
> - bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
> - bld->imms_array = lp_build_array_alloca(gallivm,
> - bld_base->base.vec_type, array_size,
> + unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
> + bld->imms_array = lp_build_alloca_undef(gallivm,
> + LLVMArrayType(bld_base->base.vec_type, array_size),
> "imms_array");
> }
>
>
Looks good to me. I suppose there was no regressions with, e.g., piglit.
Reviewed-by: Jose Fonseca <jfonseca at vmware.com>
More information about the mesa-dev
mailing list