[Mesa-dev] [PATCH 3/3] radeonsi: set GLC=1 for all write-only shader resources

Marek Olšák maraeo at gmail.com
Fri Jul 27 03:36:42 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 .../drivers/radeonsi/si_shader_tgsi_mem.c     | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 427fead09d0..f5729acb8df 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -647,20 +647,27 @@ static void store_emit_buffer(
 		struct si_shader_context *ctx,
 		struct lp_build_emit_data *emit_data,
 		bool writeonly_memory)
 {
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	LLVMBuilderRef builder = ctx->ac.builder;
 	LLVMValueRef base_data = emit_data->args[0];
 	LLVMValueRef base_offset = emit_data->args[3];
 	unsigned writemask = inst->Dst[0].Register.WriteMask;
 
+	/* If this is write-only, don't keep data in L1 to prevent
+	 * evicting L1 cache lines that may be needed by other
+	 * instructions.
+	 */
+	if (writeonly_memory)
+		emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
+
 	while (writemask) {
 		int start, count;
 		const char *intrinsic_name;
 		LLVMValueRef data;
 		LLVMValueRef offset;
 		LLVMValueRef tmp;
 
 		u_bit_scan_consecutive_range(&writemask, &start, &count);
 
 		/* Due to an LLVM limitation, split 3-element writes
@@ -762,40 +769,50 @@ static void store_emit(
 						 info->shader_buffers_atomic,
 						 info->images_load |
 						 info->images_atomic);
 
 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
 		store_emit_buffer(ctx, emit_data, writeonly_memory);
 		return;
 	}
 
 	if (target == TGSI_TEXTURE_BUFFER) {
+		/* If this is write-only, don't keep data in L1 to prevent
+		 * evicting L1 cache lines that may be needed by other
+		 * instructions.
+		 */
+		if (writeonly_memory)
+			emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
+
 		emit_data->output[emit_data->chan] = ac_build_intrinsic(
 			&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
 			emit_data->dst_type, emit_data->args,
 			emit_data->arg_count,
 			ac_get_store_intr_attribs(writeonly_memory));
 	} else {
 		struct ac_image_args args = {};
 		args.opcode = ac_image_store;
 		args.data[0] = emit_data->args[0];
 		args.resource = emit_data->args[1];
 		memcpy(args.coords, &emit_data->args[2], sizeof(args.coords));
 		args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
 		args.attributes = ac_get_store_intr_attribs(writeonly_memory);
 		args.dmask = 0xf;
 
 		/* Workaround for 8bit/16bit TC L1 write corruption bug on SI.
 		 * All store opcodes not aligned to a dword are affected.
 		 */
-		bool force_glc = ctx->screen->info.chip_class == SI;
-		if (force_glc ||
+		if (ctx->screen->info.chip_class == SI ||
+		    /* If this is write-only, don't keep data in L1 to prevent
+		     * evicting L1 cache lines that may be needed by other
+		     * instructions. */
+		    writeonly_memory ||
 		    inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))
 			args.cache_policy = ac_glc;
 
 		emit_data->output[emit_data->chan] =
 			ac_build_image_opcode(&ctx->ac, &args);
 	}
 }
 
 static void atomic_fetch_args(
 		struct lp_build_tgsi_context * bld_base,
-- 
2.17.1



More information about the mesa-dev mailing list