[Mesa-dev] [PATCH 5/5] radeonsi: implement TGSI opcodes TEX_LZ and TXF_LZ

Marek Olšák maraeo at gmail.com
Tue Mar 7 12:32:52 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

This massively decreases VGPR spilling for DiRT Showdown, because we
no longer have to use v4i32 for 2D fetches when level == 0.
We now use v2i32 for those cases.

DiRT Showdown - Spilled VGPRs: -26 (-81%)

This surprisingly doesn't have any useful effect on performance (+ 0.05%).
---
 src/gallium/drivers/radeonsi/si_pipe.c   |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 099ef4c..c0b61f1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -409,20 +409,21 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
 	case PIPE_CAP_STRING_MARKER:
 	case PIPE_CAP_CLEAR_TEXTURE:
 	case PIPE_CAP_CULL_DISTANCE:
 	case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
 	case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
 	case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
 	case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
 	case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
 	case PIPE_CAP_DOUBLES:
+	case PIPE_CAP_TGSI_TEX_TXF_LZ:
 		return 1;
 
 	case PIPE_CAP_INT64:
 	case PIPE_CAP_INT64_DIVMOD:
 		return HAVE_LLVM >= 0x0309;
 
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
 		return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
 
 	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -475,21 +476,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
 	case PIPE_CAP_TGSI_VOTE:
 	case PIPE_CAP_MAX_WINDOW_RECTANGLES:
 	case PIPE_CAP_NATIVE_FENCE_FD:
 	case PIPE_CAP_TGSI_FS_FBFETCH:
 	case PIPE_CAP_TGSI_MUL_ZERO_WINS:
 	case PIPE_CAP_UMA:
-	case PIPE_CAP_TGSI_TEX_TXF_LZ:
 		return 0;
 
 	case PIPE_CAP_QUERY_BUFFER_OBJECT:
 		return si_have_tgsi_compute(sscreen);
 
 	case PIPE_CAP_DRAW_PARAMETERS:
 	case PIPE_CAP_MULTI_DRAW_INDIRECT:
 	case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
 		return sscreen->has_draw_indirect_multi;
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 2c98715..1b7854e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4404,21 +4404,23 @@ static void tex_fetch_args(
 			coords[chan] = lp_build_emit_llvm_binary(bld_base,
 								 TGSI_OPCODE_DIV,
 								 coords[chan],
 								 coords[3]);
 	}
 
 	if (opcode == TGSI_OPCODE_TXP)
 		coords[3] = bld_base->base.one;
 
 	/* Pack offsets. */
-	if (has_offset && opcode != TGSI_OPCODE_TXF) {
+	if (has_offset &&
+	    opcode != TGSI_OPCODE_TXF &&
+	    opcode != TGSI_OPCODE_TXF_LZ) {
 		/* The offsets are six-bit signed integers packed like this:
 		 *   X=[5:0], Y=[13:8], and Z=[21:16].
 		 */
 		LLVMValueRef offset[3], pack;
 
 		assert(inst->Texture.NumOffsets == 1);
 
 		for (chan = 0; chan < 3; chan++) {
 			offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
 								     emit_data->inst, 0, chan);
@@ -4562,22 +4564,22 @@ static void tex_fetch_args(
 	    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
 		struct lp_build_context *uint_bld = &bld_base->uint_bld;
 		struct lp_build_emit_data txf_emit_data = *emit_data;
 		LLVMValueRef txf_address[4];
 		/* We only need .xy for non-arrays, and .xyz for arrays. */
 		unsigned txf_count = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
 		struct tgsi_full_instruction inst = {};
 
 		memcpy(txf_address, address, sizeof(txf_address));
 
-		/* Read FMASK using TXF. */
-		inst.Instruction.Opcode = TGSI_OPCODE_TXF;
+		/* Read FMASK using TXF_LZ. */
+		inst.Instruction.Opcode = TGSI_OPCODE_TXF_LZ;
 		inst.Texture.Texture = target;
 		txf_emit_data.inst = &inst;
 		txf_emit_data.chan = 0;
 		set_tex_fetch_args(ctx, &txf_emit_data,
 				   target, fmask_ptr, NULL,
 				   txf_address, txf_count, 0xf);
 		build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
 
 		/* Initialize some constants. */
 		LLVMValueRef four = LLVMConstInt(ctx->i32, 4, 0);
@@ -4614,21 +4616,22 @@ static void tex_fetch_args(
 		LLVMValueRef word1_is_nonzero =
 			LLVMBuildICmp(gallivm->builder, LLVMIntNE,
 				      fmask_word1, uint_bld->zero, "");
 
 		/* Replace the MSAA sample index. */
 		address[sample_chan] =
 			LLVMBuildSelect(gallivm->builder, word1_is_nonzero,
 					final_sample, address[sample_chan], "");
 	}
 
-	if (opcode == TGSI_OPCODE_TXF) {
+	if (opcode == TGSI_OPCODE_TXF ||
+	    opcode == TGSI_OPCODE_TXF_LZ) {
 		/* add tex offsets */
 		if (inst->Texture.NumOffsets) {
 			struct lp_build_context *uint_bld = &bld_base->uint_bld;
 			const struct tgsi_texture_offset *off = inst->TexOffsets;
 
 			assert(inst->Texture.NumOffsets == 1);
 
 			switch (target) {
 			case TGSI_TEXTURE_3D:
 				address[2] = lp_build_add(uint_bld, address[2],
@@ -4776,37 +4779,42 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 	}
 
 	memcpy(&args, emit_data->args, sizeof(args)); /* ugly */
 
 	args.opcode = ac_image_sample;
 	args.compare = tgsi_is_shadow_target(target);
 	args.offset = inst->Texture.NumOffsets > 0;
 
 	switch (opcode) {
 	case TGSI_OPCODE_TXF:
-		args.opcode = target == TGSI_TEXTURE_2D_MSAA ||
+	case TGSI_OPCODE_TXF_LZ:
+		args.opcode = opcode == TGSI_OPCODE_TXF_LZ ||
+			      target == TGSI_TEXTURE_2D_MSAA ||
 			      target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
 				      ac_image_load : ac_image_load_mip;
 		args.compare = false;
 		args.offset = false;
 		break;
 	case TGSI_OPCODE_LODQ:
 		args.opcode = ac_image_get_lod;
 		args.compare = false;
 		args.offset = false;
 		break;
 	case TGSI_OPCODE_TEX:
 	case TGSI_OPCODE_TEX2:
 	case TGSI_OPCODE_TXP:
 		if (ctx->type != PIPE_SHADER_FRAGMENT)
 			args.level_zero = true;
 		break;
+	case TGSI_OPCODE_TEX_LZ:
+		args.level_zero = true;
+		break;
 	case TGSI_OPCODE_TXB:
 	case TGSI_OPCODE_TXB2:
 		assert(ctx->type == PIPE_SHADER_FRAGMENT);
 		args.bias = true;
 		break;
 	case TGSI_OPCODE_TXL:
 	case TGSI_OPCODE_TXL2:
 		args.lod = true;
 		break;
 	case TGSI_OPCODE_TXD:
@@ -6447,25 +6455,27 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
 		(shader && shader->selector) ? shader->selector->tokens : NULL);
 
 	bld_base = &ctx->bld_base;
 	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
 
 	bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
 	bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
 	bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
 
 	bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
+	bld_base->op_actions[TGSI_OPCODE_TEX_LZ] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action;
+	bld_base->op_actions[TGSI_OPCODE_TXF_LZ] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
 	bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
 
 	bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
-- 
2.7.4



More information about the mesa-dev mailing list