[Mesa-dev] [PATCH 4/7] ac: add support for explicit component packing

Timothy Arceri tarceri at itsqueeze.com
Mon Oct 23 00:10:45 UTC 2017


This is needed for RADV to support explicit component packing.

This is also required to use the new NIR component splitting /
packing passes.
---
 src/amd/common/ac_nir_to_llvm.c | 57 +++++++++++++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 11 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5e5a46a21f..2ca0d487d0 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1060,21 +1060,20 @@ static int get_llvm_num_components(LLVMValueRef value)
 	                              : 1;
 	return num_components;
 }
 
 static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
 				      LLVMValueRef value,
 				      int index)
 {
 	int count = get_llvm_num_components(value);
 
-	assert(index < count);
 	if (count == 1)
 		return value;
 
 	return LLVMBuildExtractElement(ac->builder, value,
 				       LLVMConstInt(ac->i32, index, false), "");
 }
 
 static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
                                 LLVMValueRef value, unsigned count)
 {
@@ -2810,20 +2809,43 @@ get_dw_address(struct nir_to_llvm_context *ctx,
 	dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
 			       LLVMConstInt(ctx->i32, param * 4, false), "");
 
 	if (const_index && compact_const_index)
 		dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
 				       LLVMConstInt(ctx->i32, const_index, false), "");
 	return dw_addr;
 }
 
 static LLVMValueRef
+build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+			    unsigned value_count, unsigned component)
+{
+	LLVMValueRef vec = NULL;
+	unsigned value_stride = 1;
+
+	if (value_count == 1) {
+		return values[component];
+	} else if (!value_count)
+		unreachable("value_count is 0");
+
+	for (unsigned i = component; i < value_count + component; i++) {
+		LLVMValueRef value = values[i * value_stride];
+
+		if (!i)
+			vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
+		LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
+		vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
+	}
+	return vec;
+}
+
+static LLVMValueRef
 load_tcs_input(struct nir_to_llvm_context *ctx,
 	       nir_intrinsic_instr *instr)
 {
 	LLVMValueRef dw_addr, stride;
 	unsigned const_index;
 	LLVMValueRef vertex_index;
 	LLVMValueRef indir_index;
 	unsigned param;
 	LLVMValueRef value[4], result;
 	const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
@@ -2831,26 +2853,27 @@ load_tcs_input(struct nir_to_llvm_context *ctx,
 	param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
 	get_deref_offset(ctx->nir, instr->variables[0],
 			 false, NULL, per_vertex ? &vertex_index : NULL,
 			 &const_index, &indir_index);
 
 	stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
 	dw_addr = get_tcs_in_current_patch_offset(ctx);
 	dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
 				 indir_index);
 
-	for (unsigned i = 0; i < instr->num_components; i++) {
+	unsigned comp = instr->variables[0]->var->data.location_frac;
+	for (unsigned i = 0; i < instr->num_components + comp; i++) {
 		value[i] = lds_load(ctx, dw_addr);
 		dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
 				       ctx->i32one, "");
 	}
-	result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
+	result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
 	result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
 	return result;
 }
 
 static LLVMValueRef
 load_tcs_output(struct nir_to_llvm_context *ctx,
 	       nir_intrinsic_instr *instr)
 {
 	LLVMValueRef dw_addr;
 	LLVMValueRef stride = NULL;
@@ -2869,26 +2892,27 @@ load_tcs_output(struct nir_to_llvm_context *ctx,
 	if (!instr->variables[0]->var->data.patch) {
 		stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
 		dw_addr = get_tcs_out_current_patch_offset(ctx);
 	} else {
 		dw_addr = get_tcs_out_current_patch_data_offset(ctx);
 	}
 
 	dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
 				 indir_index);
 
-	for (unsigned i = 0; i < instr->num_components; i++) {
+	unsigned comp = instr->variables[0]->var->data.location_frac;
+	for (unsigned i = comp; i < instr->num_components + comp; i++) {
 		value[i] = lds_load(ctx, dw_addr);
 		dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
 				       ctx->i32one, "");
 	}
-	result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
+	result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
 	result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
 	return result;
 }
 
 static void
 store_tcs_output(struct nir_to_llvm_context *ctx,
 		 nir_intrinsic_instr *instr,
 		 LLVMValueRef src,
 		 unsigned writemask)
 {
@@ -2971,23 +2995,28 @@ load_tes_input(struct nir_to_llvm_context *ctx,
 
 	get_deref_offset(ctx->nir, instr->variables[0],
 			 false, NULL, per_vertex ? &vertex_index : NULL,
 			 &const_index, &indir_index);
 	param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
 	if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
 	    is_compact && const_index > 3) {
 		const_index -= 3;
 		param++;
 	}
+
+	unsigned comp = instr->variables[0]->var->data.location_frac;
 	buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
 						     is_compact, vertex_index, indir_index);
 
+	LLVMValueRef comp_offset = LLVMConstInt(ctx->i32, comp * 4, false);
+	buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
+
 	result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
 				      buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
 	result = trim_vector(&ctx->ac, result, instr->num_components);
 	result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
 	return result;
 }
 
 static LLVMValueRef
 load_gs_input(struct nir_to_llvm_context *ctx,
 	      nir_intrinsic_instr *instr)
@@ -3000,21 +3029,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
 	unsigned vertex_index;
 	get_deref_offset(ctx->nir, instr->variables[0],
 			 false, &vertex_index, NULL,
 			 &const_index, &indir_index);
 	vtx_offset_param = vertex_index;
 	assert(vtx_offset_param < 6);
 	vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
 				  LLVMConstInt(ctx->i32, 4, false), "");
 
 	param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
-	for (unsigned i = 0; i < instr->num_components; i++) {
+
+	unsigned comp = instr->variables[0]->var->data.location_frac;
+	for (unsigned i = comp; i < instr->num_components + comp; i++) {
 		if (ctx->ac.chip_class >= GFX9) {
 			LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
 			dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
 			                       LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
 			value[i] = lds_load(ctx, dw_addr);
 		} else {
 			args[0] = ctx->esgs_ring;
 			args[1] = vtx_offset;
 			args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
 			args[3] = ctx->i32zero;
@@ -3023,21 +3054,21 @@ load_gs_input(struct nir_to_llvm_context *ctx,
 			args[6] = ctx->i32one; /* GLC */
 			args[7] = ctx->i32zero; /* SLC */
 			args[8] = ctx->i32zero; /* TFE */
 
 			value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
 			                              ctx->i32, args, 9,
 			                              AC_FUNC_ATTR_READONLY |
 			                              AC_FUNC_ATTR_LEGACY);
 		}
 	}
-	result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
+	result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
 
 	return result;
 }
 
 static LLVMValueRef
 build_gep_for_deref(struct ac_nir_context *ctx,
 		    nir_deref_var *deref)
 {
 	struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
 	assert(entry->data);
@@ -3073,41 +3104,43 @@ build_gep_for_deref(struct ac_nir_context *ctx,
 	}
 	return val;
 }
 
 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				   nir_intrinsic_instr *instr)
 {
 	LLVMValueRef values[8];
 	int idx = instr->variables[0]->var->data.driver_location;
 	int ve = instr->dest.ssa.num_components;
+	unsigned comp = instr->variables[0]->var->data.location_frac;
 	LLVMValueRef indir_index;
 	LLVMValueRef ret;
 	unsigned const_index;
 	bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
 	             instr->variables[0]->var->data.mode == nir_var_shader_in;
 	get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
 				      &const_index, &indir_index);
 
 	if (instr->dest.ssa.bit_size == 64)
 		ve *= 2;
 
 	switch (instr->variables[0]->var->data.mode) {
 	case nir_var_shader_in:
 		if (ctx->stage == MESA_SHADER_TESS_CTRL)
 			return load_tcs_input(ctx->nctx, instr);
 		if (ctx->stage == MESA_SHADER_TESS_EVAL)
 			return load_tes_input(ctx->nctx, instr);
 		if (ctx->stage == MESA_SHADER_GEOMETRY) {
 			return load_gs_input(ctx->nctx, instr);
 		}
-		for (unsigned chan = 0; chan < ve; chan++) {
+
+		for (unsigned chan = comp; chan < ve + comp; chan++) {
 			if (indir_index) {
 				unsigned count = glsl_count_attribute_slots(
 						instr->variables[0]->var->type,
 						ctx->stage == MESA_SHADER_VERTEX);
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->abi->inputs + idx + chan, count,
 						4, false, true);
 
 				values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
@@ -3139,21 +3172,22 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 		LLVMValueRef address = build_gep_for_deref(ctx,
 							   instr->variables[0]);
 		LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
 		return LLVMBuildBitCast(ctx->ac.builder, val,
 					get_def_type(ctx, &instr->dest.ssa),
 					"");
 	}
 	case nir_var_shader_out:
 		if (ctx->stage == MESA_SHADER_TESS_CTRL)
 			return load_tcs_output(ctx->nctx, instr);
-		for (unsigned chan = 0; chan < ve; chan++) {
+
+		for (unsigned chan = comp; chan < ve + comp; chan++) {
 			if (indir_index) {
 				unsigned count = glsl_count_attribute_slots(
 						instr->variables[0]->var->type, false);
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->outputs + idx + chan, count,
 						4, true, true);
 
 				values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 								       tmp_vec,
@@ -3161,32 +3195,33 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 			} else {
 				values[chan] = LLVMBuildLoad(ctx->ac.builder,
 						     ctx->outputs[idx + chan + const_index * 4],
 						     "");
 			}
 		}
 		break;
 	default:
 		unreachable("unhandle variable mode");
 	}
-	ret = ac_build_gather_values(&ctx->ac, values, ve);
+	ret = build_varying_gather_values(&ctx->ac, values, ve, comp);
 	return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
 }
 
 static void
 visit_store_var(struct ac_nir_context *ctx,
 		nir_intrinsic_instr *instr)
 {
 	LLVMValueRef temp_ptr, value;
 	int idx = instr->variables[0]->var->data.driver_location;
+	unsigned comp = instr->variables[0]->var->data.location_frac;
 	LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
-	int writemask = instr->const_index[0];
+	int writemask = instr->const_index[0] << comp;
 	LLVMValueRef indir_index;
 	unsigned const_index;
 	get_deref_offset(ctx, instr->variables[0], false,
 		         NULL, NULL, &const_index, &indir_index);
 
 	if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
 		int old_writemask = writemask;
 
 		src = LLVMBuildBitCast(ctx->ac.builder, src,
 		                       LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2),
-- 
2.13.6



More information about the mesa-dev mailing list