[Mesa-dev] [PATCH 1/6] radeonsi: use structured buffer intrinsics for image views

Tue Nov 20 04:09:37 UTC 2018

From: Marek Olšák <marek.olsak at amd.com>

---
 .../drivers/radeonsi/si_shader_tgsi_mem.c     | 45 ++++++++++++++++---
 src/gallium/drivers/radeonsi/si_state.c       |  7 +--
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 8c44831bccb..2ba3f251ff8 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -691,31 +691,39 @@ static void store_emit(
 					     is_image, /* may_store_unaligned */
 					     writeonly_memory);
 
 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
 		store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask,
 				  value, voffset, args.cache_policy, writeonly_memory);
 		return;
 	}
 
 	if (target == TGSI_TEXTURE_BUFFER) {
-		LLVMValueRef buf_args[] = {
+		LLVMValueRef buf_args[6] = {
 			value,
 			args.resource,
 			vindex,
 			ctx->i32_0, /* voffset */
-			LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0),
-			LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0),
 		};
 
+		if (HAVE_LLVM >= 0x0800) {
+			buf_args[4] = ctx->i32_0; /* soffset */
+			buf_args[5] = LLVMConstInt(ctx->i1, args.cache_policy, 0);
+		} else {
+			buf_args[4] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0);
+			buf_args[5] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0);
+		}
+
 		emit_data->output[emit_data->chan] = ac_build_intrinsic(
-			&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
+			&ctx->ac,
+			HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.struct.buffer.store.format.v4f32" :
+					      "llvm.amdgcn.buffer.store.format.v4f32",
 			ctx->voidt, buf_args, 6,
 			ac_get_store_intr_attribs(writeonly_memory));
 	} else {
 		args.opcode = ac_image_store;
 		args.data[0] = value;
 		args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
 		args.attributes = ac_get_store_intr_attribs(writeonly_memory);
 		args.dmask = 0xf;
 
 		emit_data->output[emit_data->chan] =
@@ -823,25 +831,52 @@ static void atomic_emit(
 		args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
 		voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
 	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
 		   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
 		image_fetch_rsrc(bld_base, &inst->Src[0], true,
 				inst->Memory.Texture, &args.resource);
 		image_fetch_coords(bld_base, inst, 1, args.resource, args.coords);
 		vindex = args.coords[0]; /* for buffers only */
 	}
 
-	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
+	if (HAVE_LLVM >= 0x0800 &&
+	    inst->Src[0].Register.File != TGSI_FILE_BUFFER &&
 	    inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
 		LLVMValueRef buf_args[7];
 		unsigned num_args = 0;
 
+		buf_args[num_args++] = args.data[0];
+		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+			buf_args[num_args++] = args.data[1];
+
+		buf_args[num_args++] = args.resource;
+		buf_args[num_args++] = vindex;
+		buf_args[num_args++] = voffset;
+		buf_args[num_args++] = ctx->i32_0; /* soffset */
+		buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0);
+
+		char intrinsic_name[64];
+		snprintf(intrinsic_name, sizeof(intrinsic_name),
+			 "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name);
+		emit_data->output[emit_data->chan] =
+			ac_to_float(&ctx->ac,
+				    ac_build_intrinsic(&ctx->ac, intrinsic_name,
+						       ctx->i32, buf_args, num_args, 0));
+		return;
+	}
+
+	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
+	    (HAVE_LLVM < 0x0800 &&
+	     inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
+		LLVMValueRef buf_args[7];
+		unsigned num_args = 0;
+
 		buf_args[num_args++] = args.data[0];
 		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
 			buf_args[num_args++] = args.data[1];
 
 		buf_args[num_args++] = args.resource;
 		buf_args[num_args++] = vindex;
 		buf_args[num_args++] = voffset;
 		buf_args[num_args++] = args.cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
 
 		char intrinsic_name[40];
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e3b45fa6ea7..41aa4ef3336 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3606,28 +3606,25 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
 	 *       ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
 	 *       using SMEM. This can be done in the shader by clearing STRIDE with s_and.
 	 *       That way the same descriptor can be used by both SMEM and VMEM.
 	 *
 	 * GFX9:
 	 * - For SMEM and STRIDE == 0, it's in byte units.
 	 * - For SMEM and STRIDE != 0, it's in units of STRIDE.
 	 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
 	 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
 	 */
-	if (screen->info.chip_class >= GFX9)
-		/* When vindex == 0, LLVM sets IDXEN = 0, thus changing units
+	if (screen->info.chip_class >= GFX9 && HAVE_LLVM < 0x0800)
+		/* When vindex == 0, LLVM < 8.0 sets IDXEN = 0, thus changing units
 		 * from STRIDE to bytes. This works around it by setting
 		 * NUM_RECORDS to at least the size of one element, so that
 		 * the first element is readable when IDXEN == 0.
-		 *
-		 * TODO: Fix this in LLVM, but do we need a new intrinsic where
-		 *       IDXEN is enforced?
 		 */
 		num_records = num_records ? MAX2(num_records, stride) : 0;
 	else if (screen->info.chip_class == VI)
 		num_records *= stride;
 
 	state[4] = 0;
 	state[5] = S_008F04_STRIDE(stride);
 	state[6] = num_records;
 	state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
 		   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
-- 
2.17.1