[Mesa-dev] [PATCH 09/18] radeonsi: inline store_fetch_args

Marek Olšák maraeo at gmail.com
Sat Aug 4 07:54:48 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 .../drivers/radeonsi/si_shader_tgsi_mem.c     | 103 +++++++-----------
 1 file changed, 42 insertions(+), 61 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index e7ba17048a7..f0220881995 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -567,78 +567,20 @@ static void load_emit(
 		if (inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))
 			args.cache_policy = ac_glc;
 		args.attributes = ac_get_load_intr_attribs(can_speculate);
 		args.dmask = 0xf;
 
 		emit_data->output[emit_data->chan] =
 			ac_build_image_opcode(&ctx->ac, &args);
 	}
 }
 
-static void store_fetch_args(
-		struct lp_build_tgsi_context * bld_base,
-		struct lp_build_emit_data * emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-	struct tgsi_full_src_register memory;
-	LLVMValueRef chans[4];
-	LLVMValueRef data;
-	LLVMValueRef rsrc;
-	unsigned chan;
-
-	emit_data->dst_type = ctx->voidt;
-
-	for (chan = 0; chan < 4; ++chan) {
-		chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
-	}
-	data = ac_build_gather_values(&ctx->ac, chans, 4);
-
-	emit_data->args[emit_data->arg_count++] = data;
-
-	memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
-
-	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
-		LLVMValueRef offset;
-		LLVMValueRef tmp;
-
-		rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);
-
-		tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
-		offset = ac_to_integer(&ctx->ac, tmp);
-
-		buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
-				   offset, false, false);
-	} else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
-		   tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
-		unsigned target = inst->Memory.Texture;
-
-		/* 8bit/16bit TC L1 write corruption bug on SI.
-		 * All store opcodes not aligned to a dword are affected.
-		 *
-		 * The only way to get unaligned stores in radeonsi is through
-		 * shader images.
-		 */
-		bool force_glc = ctx->screen->info.chip_class == SI;
-
-		image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
-		image_fetch_coords(bld_base, inst, 0, rsrc, &emit_data->args[2]);
-
-		if (target == TGSI_TEXTURE_BUFFER) {
-			buffer_append_args(ctx, emit_data, rsrc, emit_data->args[2],
-					   ctx->i32_0, false, force_glc);
-		} else {
-			emit_data->args[1] = rsrc;
-		}
-	}
-}
-
 static void store_emit_buffer(
 		struct si_shader_context *ctx,
 		struct lp_build_emit_data *emit_data,
 		bool writeonly_memory)
 {
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	LLVMBuilderRef builder = ctx->ac.builder;
 	LLVMValueRef base_data = emit_data->args[0];
 	LLVMValueRef base_offset = emit_data->args[3];
 	unsigned writemask = inst->Dst[0].Register.WriteMask;
@@ -698,21 +640,21 @@ static void store_emit_buffer(
 		if (start != 0) {
 			offset = LLVMBuildAdd(
 				builder, offset,
 				LLVMConstInt(ctx->i32, start * 4, 0), "");
 		}
 
 		emit_data->args[0] = data;
 		emit_data->args[3] = offset;
 
 		ac_build_intrinsic(
-			&ctx->ac, intrinsic_name, emit_data->dst_type,
+			&ctx->ac, intrinsic_name, ctx->voidt,
 			emit_data->args, emit_data->arg_count,
 			ac_get_store_intr_attribs(writeonly_memory));
 	}
 }
 
 static void store_emit_memory(
 		struct si_shader_context *ctx,
 		struct lp_build_emit_data *emit_data)
 {
 	const struct tgsi_full_instruction *inst = emit_data->inst;
@@ -735,28 +677,68 @@ static void store_emit_memory(
 }
 
 static void store_emit(
 		const struct lp_build_tgsi_action *action,
 		struct lp_build_tgsi_context *bld_base,
 		struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	const struct tgsi_full_instruction * inst = emit_data->inst;
 	const struct tgsi_shader_info *info = &ctx->shader->selector->info;
+	struct tgsi_full_src_register resource_reg =
+		tgsi_full_src_register_from_dst(&inst->Dst[0]);
 	unsigned target = inst->Memory.Texture;
 	bool writeonly_memory = false;
+	LLVMValueRef chans[4], rsrc;
 
 	if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
 		store_emit_memory(ctx, emit_data);
 		return;
 	}
 
+	for (unsigned chan = 0; chan < 4; ++chan)
+		chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
+
+	emit_data->args[emit_data->arg_count++] =
+		ac_build_gather_values(&ctx->ac, chans, 4);
+
+	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
+		LLVMValueRef offset, tmp;
+
+		rsrc = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
+
+		tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
+		offset = ac_to_integer(&ctx->ac, tmp);
+
+		buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
+				   offset, false, false);
+	} else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
+		   tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
+		/* 8bit/16bit TC L1 write corruption bug on SI.
+		 * All store opcodes not aligned to a dword are affected.
+		 *
+		 * The only way to get unaligned stores in radeonsi is through
+		 * shader images.
+		 */
+		bool force_glc = ctx->screen->info.chip_class == SI;
+
+		image_fetch_rsrc(bld_base, &resource_reg, true, target, &rsrc);
+		image_fetch_coords(bld_base, inst, 0, rsrc, &emit_data->args[2]);
+
+		if (target == TGSI_TEXTURE_BUFFER) {
+			buffer_append_args(ctx, emit_data, rsrc, emit_data->args[2],
+					   ctx->i32_0, false, force_glc);
+		} else {
+			emit_data->args[1] = rsrc;
+		}
+	}
+
 	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
 		ac_build_waitcnt(&ctx->ac, VM_CNT);
 
 	writeonly_memory = is_oneway_access_only(inst, info,
 						 info->shader_buffers_load |
 						 info->shader_buffers_atomic,
 						 info->images_load |
 						 info->images_atomic);
 
 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
@@ -767,21 +749,21 @@ static void store_emit(
 	if (target == TGSI_TEXTURE_BUFFER) {
 		/* If this is write-only, don't keep data in L1 to prevent
 		 * evicting L1 cache lines that may be needed by other
 		 * instructions.
 		 */
 		if (writeonly_memory)
 			emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
 
 		emit_data->output[emit_data->chan] = ac_build_intrinsic(
 			&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
-			emit_data->dst_type, emit_data->args,
+			ctx->voidt, emit_data->args,
 			emit_data->arg_count,
 			ac_get_store_intr_attribs(writeonly_memory));
 	} else {
 		struct ac_image_args args = {};
 		args.opcode = ac_image_store;
 		args.data[0] = emit_data->args[0];
 		args.resource = emit_data->args[1];
 		memcpy(args.coords, &emit_data->args[2], sizeof(args.coords));
 		args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
 		args.attributes = ac_get_store_intr_attribs(writeonly_memory);
@@ -1829,21 +1811,20 @@ void si_shader_context_init_mem(struct si_shader_context *ctx)
 	bld_base->op_actions[TGSI_OPCODE_TXL2].emit = build_tex_intrinsic;
 	bld_base->op_actions[TGSI_OPCODE_TXP].emit = build_tex_intrinsic;
 	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = resq_emit;
 	bld_base->op_actions[TGSI_OPCODE_TG4].emit = build_tex_intrinsic;
 	bld_base->op_actions[TGSI_OPCODE_LODQ].emit = build_tex_intrinsic;
 	bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
 
 	bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_llvm_emit_fbfetch;
 
 	bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
-	bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
 	bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
 
 	tmpl.fetch_args = atomic_fetch_args;
 	tmpl.emit = atomic_emit;
 	bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
 	bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
 	bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl;
 	bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
 	bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl;
-- 
2.17.1



More information about the mesa-dev mailing list