[Mesa-dev] [PATCH 9/9] ac, radeonsi: use ac_build_gather_values more

Samuel Pitoiset samuel.pitoiset at gmail.com
Tue Aug 21 07:17:54 UTC 2018


Patches 3-9 are:

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 8/21/18 5:23 AM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>   src/amd/common/ac_nir_to_llvm.c               | 14 +++--------
>   src/gallium/drivers/radeonsi/si_shader.c      |  8 +++---
>   .../drivers/radeonsi/si_shader_tgsi_mem.c     | 25 +++++++------------
>   .../drivers/radeonsi/si_shader_tgsi_setup.c   | 17 ++++---------
>   4 files changed, 20 insertions(+), 44 deletions(-)
> 
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 537ac33c044..700e48e14b7 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -479,35 +479,30 @@ static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
>   	comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
>   
>   	return LLVMBuildBitCast(ctx->builder, ac_build_cvt_pkrtz_f16(ctx, comp),
>   				ctx->i32, "");
>   }
>   
>   static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
>   					  LLVMValueRef src0)
>   {
>   	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
> -	LLVMValueRef temps[2], result, val;
> +	LLVMValueRef temps[2], val;
>   	int i;
>   
>   	for (i = 0; i < 2; i++) {
>   		val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
>   		val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
>   		val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
>   		temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
>   	}
> -
> -	result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
> -					ctx->i32_0, "");
> -	result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
> -					ctx->i32_1, "");
> -	return result;
> +	return ac_build_gather_values(ctx, temps, 2);
>   }
>   
>   static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>   			      nir_op op,
>   			      LLVMValueRef src0)
>   {
>   	unsigned mask;
>   	int idx;
>   	LLVMValueRef result;
>   
> @@ -997,24 +992,21 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
>   		LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
>   						    ctx->ac.v2i32,
>   						    "");
>   		result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
>   						 ctx->ac.i32_1, "");
>   		break;
>   	}
>   
>   	case nir_op_pack_64_2x32_split: {
>   		LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
> -		tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
> -					     src[0], ctx->ac.i32_0, "");
> -		tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
> -					     src[1], ctx->ac.i32_1, "");
> +		tmp = ac_build_gather_values(&ctx->ac, src, 2);
>   		result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
>   		break;
>   	}
>   
>   	case nir_op_cube_face_coord: {
>   		src[0] = ac_to_float(&ctx->ac, src[0]);
>   		LLVMValueRef results[2];
>   		LLVMValueRef in[3];
>   		for (unsigned chan = 0; chan < 3; chan++)
>   			in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 66fe5fad218..cfd99b61601 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2717,26 +2717,24 @@ static void emit_streamout_output(struct si_shader_context *ctx,
>   
>   	/* Pack the output. */
>   	LLVMValueRef vdata = NULL;
>   
>   	switch (num_comps) {
>   	case 1: /* as i32 */
>   		vdata = out[0];
>   		break;
>   	case 2: /* as v2i32 */
>   	case 3: /* as v4i32 (aligned to 4) */
> +		out[3] = LLVMGetUndef(ctx->i32);
> +		/* fall through */
>   	case 4: /* as v4i32 */
> -		vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps)));
> -		for (int j = 0; j < num_comps; j++) {
> -			vdata = LLVMBuildInsertElement(ctx->ac.builder, vdata, out[j],
> -						       LLVMConstInt(ctx->i32, j, 0), "");
> -		}
> +		vdata = ac_build_gather_values(&ctx->ac, out, util_next_power_of_two(num_comps));
>   		break;
>   	}
>   
>   	ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx],
>   				    vdata, num_comps,
>   				    so_write_offsets[buf_idx],
>   				    ctx->i32_0,
>   				    stream_out->dst_offset * 4, 1, 1, true, false);
>   }
>   
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> index 54a0413e464..8e0578b4d5e 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> @@ -546,50 +546,43 @@ static void store_emit_buffer(struct si_shader_context *ctx,
>   			      unsigned cache_policy,
>   			      bool writeonly_memory)
>   {
>   	LLVMBuilderRef builder = ctx->ac.builder;
>   	LLVMValueRef base_data = value;
>   	LLVMValueRef base_offset = voffset;
>   
>   	while (writemask) {
>   		int start, count;
>   		const char *intrinsic_name;
> -		LLVMValueRef data, voff, tmp;
> +		LLVMValueRef data, voff;
>   
>   		u_bit_scan_consecutive_range(&writemask, &start, &count);
>   
>   		/* Due to an LLVM limitation, split 3-element writes
>   		 * into a 2-element and a 1-element write. */
>   		if (count == 3) {
>   			writemask |= 1 << (start + 2);
>   			count = 2;
>   		}
>   
>   		if (count == 4) {
>   			data = base_data;
>   			intrinsic_name = "llvm.amdgcn.buffer.store.v4f32";
>   		} else if (count == 2) {
> -			LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
> -
> -			tmp = LLVMBuildExtractElement(
> -				builder, base_data,
> -				LLVMConstInt(ctx->i32, start, 0), "");
> -			data = LLVMBuildInsertElement(
> -				builder, LLVMGetUndef(v2f32), tmp,
> -				ctx->i32_0, "");
> -
> -			tmp = LLVMBuildExtractElement(
> -				builder, base_data,
> -				LLVMConstInt(ctx->i32, start + 1, 0), "");
> -			data = LLVMBuildInsertElement(
> -				builder, data, tmp, ctx->i32_1, "");
> -
> +			LLVMValueRef values[2] = {
> +				LLVMBuildExtractElement(builder, base_data,
> +							LLVMConstInt(ctx->i32, start, 0), ""),
> +				LLVMBuildExtractElement(builder, base_data,
> +							LLVMConstInt(ctx->i32, start + 1, 0), ""),
> +			};
> +
> +			data = ac_build_gather_values(&ctx->ac, values, 2);
>   			intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
>   		} else {
>   			assert(count == 1);
>   			data = LLVMBuildExtractElement(
>   				builder, base_data,
>   				LLVMConstInt(ctx->i32, start, 0), "");
>   			intrinsic_name = "llvm.amdgcn.buffer.store.f32";
>   		}
>   
>   		voff = base_offset;
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> index 1f37b0ba37d..20164939cb7 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> @@ -298,32 +298,25 @@ get_pointer_into_array(struct si_shader_context *ctx,
>   	return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
>   }
>   
>   LLVMValueRef
>   si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
>   			 LLVMTypeRef type,
>   			 LLVMValueRef ptr,
>   			 LLVMValueRef ptr2)
>   {
>   	struct si_shader_context *ctx = si_shader_context(bld_base);
> -	LLVMValueRef result;
> -
> -	result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
> -
> -	result = LLVMBuildInsertElement(ctx->ac.builder,
> -					result,
> -					ac_to_integer(&ctx->ac, ptr),
> -					ctx->i32_0, "");
> -	result = LLVMBuildInsertElement(ctx->ac.builder,
> -					result,
> -					ac_to_integer(&ctx->ac, ptr2),
> -					ctx->i32_1, "");
> +	LLVMValueRef values[2] = {
> +		ac_to_integer(&ctx->ac, ptr),
> +		ac_to_integer(&ctx->ac, ptr2),
> +	};
> +	LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
>   	return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
>   }
>   
>   static LLVMValueRef
>   emit_array_fetch(struct lp_build_tgsi_context *bld_base,
>   		 unsigned File, enum tgsi_opcode_type type,
>   		 struct tgsi_declaration_range range,
>   		 unsigned swizzle)
>   {
>   	struct si_shader_context *ctx = si_shader_context(bld_base);
> 


More information about the mesa-dev mailing list