[Mesa-dev] [PATCH 6/7] radv: add support for 16bit input/output

Daniel Schürmann daniel.schuermann at campus.tu-berlin.de
Wed Feb 7 18:48:00 UTC 2018


Signed-off-by: Daniel Schürmann <daniel.schuermann at campus.tu-berlin.de>
---
 src/amd/common/ac_nir_to_llvm.c | 92 ++++++++++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 20 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index d38a649074..842a846df1 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3033,6 +3033,8 @@ store_tcs_output(struct ac_shader_abi *abi,
 		if (!(writemask & (1 << chan)))
 			continue;
 		LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
+		value = ac_to_integer(&ctx->ac, value);
+		value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
 
 		if (store_lds || is_tess_factor) {
 			LLVMValueRef dw_addr_chan =
@@ -3129,8 +3131,13 @@ load_gs_input(struct ac_shader_abi *abi,
 							vtx_offset, soffset,
 							0, 1, 0, true, false);
 
-			value[i] = LLVMBuildBitCast(ctx->builder, value[i],
-						    type, "");
+
+
+			if (ac_get_type_size(type) == 2) {
+				value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], ctx->ac.i32, "");
+				value[i] = LLVMBuildTrunc(ctx->ac.builder, value[i], ctx->ac.i16, "");
+			}
+			value[i] = LLVMBuildBitCast(ctx->builder, value[i], type, "");
 		}
 	}
 	result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
@@ -3199,6 +3206,10 @@ static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
 					      instr->variables[0]->var->data.location_frac,
 					      instr->num_components,
 					      is_patch, is_compact, load_inputs);
+	if (instr->dest.ssa.bit_size == 16) {
+		result = ac_to_integer(&ctx->ac, result);
+		result = LLVMBuildTrunc(ctx->ac.builder, result, get_def_type(ctx, &instr->dest.ssa), "");
+	}
 	return LLVMBuildBitCast(ctx->ac.builder, result, get_def_type(ctx, &instr->dest.ssa), "");
 }
 
@@ -4258,9 +4269,8 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
 			LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
 			voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
 			voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
-
-			out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, "");
-
+			out_val = ac_to_integer (&ctx->ac, out_val);
+			out_val = LLVMBuildZExtOrBitCast(ctx->builder, out_val, ctx->ac.i32, "");
 			ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
 						    out_val, 1,
 						    voffset, ctx->gs2vs_offset, 0,
@@ -5382,6 +5392,7 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
 
 	variable->data.driver_location = idx * 4;
 
+	enum glsl_base_type type = glsl_get_base_type(variable->type);
 	for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
 		if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
 			buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
@@ -5407,9 +5418,16 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
 
 		for (unsigned chan = 0; chan < 4; chan++) {
 			LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
-			ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
-				ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
-							input, llvm_chan, ""));
+			LLVMValueRef tmp = LLVMBuildExtractElement(ctx->builder, input, llvm_chan, "");
+
+			if (type == GLSL_TYPE_FLOAT16) {
+				tmp = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.f32, "");
+				tmp = LLVMBuildFPTrunc(ctx->ac.builder, tmp, ctx->ac.f16, "");
+			}
+			tmp = ac_to_integer(&ctx->ac, tmp);
+			if (type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16)
+				tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i16, "");
+			ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = tmp;
 		}
 	}
 }
@@ -5423,7 +5441,7 @@ static void interp_fs_input(struct nir_to_llvm_context *ctx,
 	LLVMValueRef attr_number;
 	unsigned chan;
 	LLVMValueRef i, j;
-	bool interp = interp_param != NULL;
+	bool interp = !LLVMIsUndef(interp_param);
 
 	attr_number = LLVMConstInt(ctx->ac.i32, attr, false);
 
@@ -5461,6 +5479,8 @@ static void interp_fs_input(struct nir_to_llvm_context *ctx,
 							      llvm_chan,
 							      attr_number,
 							      prim_mask);
+			result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
+			result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), "");
 		}
 	}
 }
@@ -5471,7 +5491,7 @@ handle_fs_input_decl(struct nir_to_llvm_context *ctx,
 {
 	int idx = variable->data.location;
 	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
-	LLVMValueRef interp;
+	LLVMValueRef interp = NULL;
 
 	variable->data.driver_location = idx * 4;
 	ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
@@ -5487,8 +5507,11 @@ handle_fs_input_decl(struct nir_to_llvm_context *ctx,
 			interp_type = INTERP_CENTER;
 
 		interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
-	} else
-		interp = NULL;
+	}
+	bool is16bit = glsl_type_is_16bit(variable->type);
+	LLVMTypeRef type = is16bit ? ctx->ac.i16 : ctx->ac.i32;
+	if (interp == NULL)
+		interp = LLVMGetUndef(type);
 
 	for (unsigned i = 0; i < attrib_count; ++i)
 		ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
@@ -5541,8 +5564,10 @@ handle_fs_inputs(struct nir_to_llvm_context *ctx,
 	unsigned index = 0;
 
 	if (ctx->shader_info->info.ps.uses_input_attachments ||
-	    ctx->shader_info->info.needs_multiview_view_index)
+	    ctx->shader_info->info.needs_multiview_view_index) {
 		ctx->input_mask |= 1ull << VARYING_SLOT_LAYER;
+		ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)] = LLVMGetUndef (ctx->ac.i32);
+	}
 
 	for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
 		LLVMValueRef interp_param;
@@ -5557,7 +5582,7 @@ handle_fs_inputs(struct nir_to_llvm_context *ctx,
 			interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
 					inputs);
 
-			if (!interp_param)
+			if (LLVMIsUndef(interp_param))
 				ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
 			++index;
 		} else if (i == VARYING_SLOT_POS) {
@@ -5687,10 +5712,13 @@ handle_shader_output_decl(struct ac_nir_context *ctx,
 		}
 	}
 
+	bool is16bit = glsl_type_is_16bit(variable->type);
+	LLVMTypeRef type = is16bit ? ctx->ac.f16 : ctx->ac.f32;
+
 	for (unsigned i = 0; i < attrib_count; ++i) {
 		for (unsigned chan = 0; chan < 4; chan++) {
 			ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] =
-		                       si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
+		                       si_build_alloca_undef(&ctx->ac, type, "");
 		}
 	}
 }
@@ -5844,6 +5872,8 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 	if (!values)
 		return;
 
+	unsigned type_size = ac_get_type_size(LLVMTypeOf(values[0]));
+
 	if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
 		LLVMValueRef val[4];
 		unsigned index = target - V_008DFC_SQ_EXP_MRT;
@@ -5876,7 +5906,11 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 
 		case V_028714_SPI_SHADER_FP16_ABGR:
 			args->compr = 1;
-
+			if (type_size == 2) {
+				for (unsigned chan = 0; chan < 4; chan++) {
+					values[chan] = LLVMBuildFPExt(ctx->ac.builder, values[chan], ctx->ac.f32, "");
+				}
+			}
 			for (unsigned chan = 0; chan < 2; chan++) {
 				LLVMValueRef pack_args[2] = {
 					values[2 * chan],
@@ -5933,6 +5967,7 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 
 			for (unsigned chan = 0; chan < 4; chan++) {
 				val[chan] = ac_to_integer(&ctx->ac, values[chan]);
+				val[chan] = LLVMBuildZExtOrBitCast(ctx->ac.builder, val[chan], ctx->ac.i32, "");
 				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb);
 			}
 
@@ -5953,6 +5988,7 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 			/* Clamp. */
 			for (unsigned chan = 0; chan < 4; chan++) {
 				val[chan] = ac_to_integer(&ctx->ac, values[chan]);
+				val[chan] = LLVMBuildSExtOrBitCast(ctx->ac.builder, val[chan], ctx->ac.i32, "");
 				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb);
 				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb);
 			}
@@ -5968,8 +6004,15 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 			memcpy(&args->out[0], values, sizeof(values[0]) * 4);
 			break;
 		}
-	} else
+	} else {
+		if (type_size == 2) {
+			for (unsigned chan = 0; chan < 4; chan++) {
+				values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i16, "");
+				args->out[chan] = LLVMBuildZExt(ctx->ac.builder, values[chan], ctx->ac.i32, "");
+			}
+		} else
 		memcpy(&args->out[0], values, sizeof(values[0]) * 4);
+	}
 
 	for (unsigned i = 0; i < 4; ++i)
 		args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
@@ -6228,7 +6271,8 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx,
 		}
 		for (j = 0; j < length; j++) {
 			LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
-			out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, "");
+			out_val = ac_to_integer (&ctx->ac, out_val);
+			out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
 
 			if (ctx->ac.chip_class  >= GFX9) {
 				ac_lds_store(&ctx->ac, dw_addr,
@@ -6271,8 +6315,10 @@ handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
 						    LLVMConstInt(ctx->ac.i32, param * 4, false),
 						    "");
 		for (unsigned j = 0; j < length; j++) {
-			ac_lds_store(&ctx->ac, dw_addr,
-				     LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
+			LLVMValueRef value = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
+			value = ac_to_integer(&ctx->ac, value);
+			value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
+			ac_lds_store(&ctx->ac, dw_addr, value);
 			dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, "");
 		}
 	}
@@ -7240,6 +7286,12 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
 						     vtx_offset, soffset,
 						     0, 1, 1, true, false);
 
+			LLVMTypeRef type = LLVMGetAllocatedType(ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
+			if (ac_get_type_size(type) == 2) {
+				value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
+				value = LLVMBuildTrunc(ctx->ac.builder, value, ctx->ac.i16, "");
+			}
+
 			LLVMBuildStore(ctx->builder,
 				       ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
 		}
-- 
2.14.1



More information about the mesa-dev mailing list