[Mesa-dev] [PATCH 02/14] radeonsi: stop using v16i8

Marek Olšák maraeo at gmail.com
Fri Apr 28 21:42:39 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/amd/common/ac_llvm_build.c                      |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c            | 18 ++++++++----------
 src/gallium/drivers/radeonsi/si_shader_internal.h   |  1 -
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c |  1 -
 4 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 209dfdd..171016b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -752,21 +752,21 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
 					  ctx->v4f32, args, ARRAY_SIZE(args),
 					  /* READNONE means writes can't
 					   * affect it, while READONLY means
 					   * that writes can affect it. */
 					  readonly_memory && HAVE_LLVM >= 0x0400 ?
 						  AC_FUNC_ATTR_READNONE :
 						  AC_FUNC_ATTR_READONLY);
 	}
 
 	LLVMValueRef args[] = {
-		rsrc,
+		LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
 		voffset,
 		vindex,
 	};
 	return ac_build_intrinsic(ctx, "llvm.SI.vs.load.input",
 				  ctx->v4f32, args, 3,
 				  AC_FUNC_ATTR_READNONE |
 				  AC_FUNC_ATTR_LEGACY);
 }
 
 /**
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 77dd6b1..3ac1ef4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1378,21 +1378,21 @@ static LLVMValueRef get_sample_id(struct si_shader_context *ctx)
 /**
  * Load a dword from a constant buffer.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
 				      LLVMValueRef resource,
 				      LLVMValueRef offset)
 {
 	LLVMBuilderRef builder = ctx->gallivm.builder;
 	LLVMValueRef args[2] = {resource, offset};
 
-	return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
+	return lp_build_intrinsic(builder, "llvm.SI.load.const.v4i32", ctx->f32, args, 2,
 				  LP_FUNC_ATTR_READNONE |
 				  LP_FUNC_ATTR_LEGACY);
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id)
 {
 	struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
@@ -4666,22 +4666,21 @@ static void tex_fetch_args(
 	unsigned chan;
 	unsigned num_deriv_channels = 0;
 	bool has_offset = inst->Texture.NumOffsets > 0;
 	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
 	unsigned dmask = 0xf;
 
 	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
 
 	if (target == TGSI_TEXTURE_BUFFER) {
 		emit_data->dst_type = ctx->v4f32;
-		emit_data->args[0] = LLVMBuildBitCast(gallivm->builder, res_ptr,
-						      ctx->v16i8, "");
+		emit_data->args[0] = res_ptr;
 		emit_data->args[1] = ctx->i32_0;
 		emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
 		emit_data->arg_count = 3;
 		return;
 	}
 
 	/* Fetch and project texture coordinates */
 	coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
 	for (chan = 0; chan < 3; chan++ ) {
 		coords[chan] = lp_build_emit_fetch(bld_base,
@@ -5835,48 +5834,48 @@ static unsigned si_get_max_workgroup_size(struct si_shader *shader)
 		max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
 	}
 	return max_work_group_size;
 }
 
 static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
 					    LLVMTypeRef *params,
 					    unsigned *num_params,
 					    bool assign_params)
 {
-	params[(*num_params)++] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
+	params[(*num_params)++] = const_array(ctx->v4i32, SI_NUM_CONST_BUFFERS);
 	params[(*num_params)++] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
 	params[(*num_params)++] = const_array(ctx->v8i32, SI_NUM_IMAGES);
 	params[(*num_params)++] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
 
 	if (assign_params) {
 		ctx->param_const_buffers  = *num_params - 4;
 		ctx->param_samplers	  = *num_params - 3;
 		ctx->param_images	  = *num_params - 2;
 		ctx->param_shader_buffers = *num_params - 1;
 	}
 }
 
 static void declare_default_desc_pointers(struct si_shader_context *ctx,
 					  LLVMTypeRef *params,
 				          unsigned *num_params)
 {
 	params[ctx->param_rw_buffers = (*num_params)++] =
-		const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+		const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
 	declare_per_stage_desc_pointers(ctx, params, num_params, true);
 }
 
 static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
 					    LLVMTypeRef *params,
 					    unsigned *num_params)
 {
 	params[ctx->param_vertex_buffers = (*num_params)++] =
-		const_array(ctx->v16i8, SI_NUM_VERTEX_BUFFERS);
+		const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS);
 	params[ctx->param_base_vertex = (*num_params)++] = ctx->i32;
 	params[ctx->param_start_instance = (*num_params)++] = ctx->i32;
 	params[ctx->param_draw_id = (*num_params)++] = ctx->i32;
 	params[ctx->param_vs_state_bits = (*num_params)++] = ctx->i32;
 }
 
 static void declare_vs_input_vgprs(struct si_shader_context *ctx,
 				   LLVMTypeRef *params, unsigned *num_params,
 				   unsigned *num_prolog_vgprs)
 {
@@ -5984,21 +5983,21 @@ static void create_function(struct si_shader_context *ctx)
 		 */
 		for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
 			returns[num_returns++] = ctx->i32; /* SGPRs */
 		for (i = 0; i < 3; i++)
 			returns[num_returns++] = ctx->f32; /* VGPRs */
 		break;
 
 	case SI_SHADER_MERGED_VERTEX_TESSCTRL:
 		/* Merged stages have 8 system SGPRs at the beginning. */
 		params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
-			const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+			const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
 		params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
 		params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
 		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
 		params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32; /* unused */
 		params[num_params++] = ctx->i32; /* unused */
 
 		params[num_params++] = ctx->i32; /* unused */
 		params[num_params++] = ctx->i32; /* unused */
 		declare_per_stage_desc_pointers(ctx, params, &num_params,
@@ -6039,21 +6038,21 @@ static void create_function(struct si_shader_context *ctx)
 			for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++)
 				returns[num_returns++] = ctx->i32; /* SGPRs */
 			for (i = 0; i < 3; i++)
 				returns[num_returns++] = ctx->f32; /* VGPRs */
 		}
 		break;
 
 	case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
 		/* Merged stages have 8 system SGPRs at the beginning. */
 		params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */
-			const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+			const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
 		params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
 		params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
 		params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */
 		params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
 
 		params[num_params++] = ctx->i32; /* unused */
 		params[num_params++] = ctx->i32; /* unused */
 		declare_per_stage_desc_pointers(ctx, params, &num_params,
@@ -6358,21 +6357,20 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
 					     S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
 					     S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
 					     S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
 					     S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
 					     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
 					     S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */
 					     S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
 					     S_008F0C_ADD_TID_ENABLE(1),
 					     0),
 				LLVMConstInt(ctx->i32, 3, 0), "");
-			ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, "");
 
 			ctx->gsvs_ring[stream] = ring;
 		}
 	}
 }
 
 static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
 					 LLVMValueRef param_rw_buffers,
 					 unsigned param_pos_fixed_pt)
 {
@@ -8691,21 +8689,21 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 		unsigned pos = key->ps_prolog.num_input_sgprs +
 			       key->ps_prolog.num_input_vgprs - 1;
 		LLVMValueRef ptr[2], list;
 
 		/* Get the pointer to rw buffers. */
 		ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS);
 		ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI);
 		list = lp_build_gather_values(gallivm, ptr, 2);
 		list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
 		list = LLVMBuildIntToPtr(gallivm->builder, list,
-					  const_array(ctx->v16i8, SI_NUM_RW_BUFFERS), "");
+					  const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
 
 		si_llvm_emit_polygon_stipple(ctx, list, pos);
 	}
 
 	if (key->ps_prolog.states.bc_optimize_for_persp ||
 	    key->ps_prolog.states.bc_optimize_for_linear) {
 		unsigned i, base = key->ps_prolog.num_input_sgprs;
 		LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
 
 		/* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index cad2db3..03bf83d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -215,21 +215,20 @@ struct si_shader_context {
 	LLVMValueRef gs_next_vertex[4];
 	LLVMValueRef return_value;
 
 	LLVMTypeRef voidt;
 	LLVMTypeRef i1;
 	LLVMTypeRef i8;
 	LLVMTypeRef i32;
 	LLVMTypeRef i64;
 	LLVMTypeRef i128;
 	LLVMTypeRef f32;
-	LLVMTypeRef v16i8;
 	LLVMTypeRef v2i32;
 	LLVMTypeRef v4i32;
 	LLVMTypeRef v4f32;
 	LLVMTypeRef v8i32;
 
 	LLVMValueRef i32_0;
 	LLVMValueRef i32_1;
 
 	LLVMValueRef shared_memory;
 };
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index c733f5a..66b1916 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1334,21 +1334,20 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 
 	si_shader_context_init_alu(&ctx->bld_base);
 
 	ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
 	ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
 	ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
 	ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
 	ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
 	ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
 	ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
-	ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
 	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
 
 	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
 	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
 }
 
 /* Set the context to a certain TGSI shader. Can be called repeatedly
-- 
2.7.4



More information about the mesa-dev mailing list