[Mesa-dev] [PATCH 5/5] amd/common: use the dimension-aware image intrinsics on LLVM 7+

Wed Apr 11 11:13:57 UTC 2018

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Requires LLVM trunk r329166.
---
 src/amd/common/ac_llvm_build.c | 189 +++++++++++++++++++++++++++++++++++------
 1 file changed, 165 insertions(+), 24 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index b5561533437..15768d1d26d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1462,39 +1462,47 @@ static unsigned ac_num_derivs(enum ac_image_dim dim)
 		return 4;
 	case ac_image_3d:
 		return 6;
 	case ac_image_2dmsaa:
 	case ac_image_2darraymsaa:
 	default:
 		unreachable("derivatives not supported");
 	}
 }
 
-LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
-				   struct ac_image_args *a)
+static const char *get_atomic_name(enum ac_atomic_op op)
+{
+	switch (op) {
+	case ac_atomic_swap: return "swap";
+	case ac_atomic_add: return "add";
+	case ac_atomic_sub: return "sub";
+	case ac_atomic_smin: return "smin";
+	case ac_atomic_umin: return "umin";
+	case ac_atomic_smax: return "smax";
+	case ac_atomic_umax: return "umax";
+	case ac_atomic_and: return "and";
+	case ac_atomic_or: return "or";
+	case ac_atomic_xor: return "xor";
+	}
+	unreachable("bad atomic op");
+}
+
+/* LLVM 6 and older */
+static LLVMValueRef ac_build_image_opcode_llvm6(struct ac_llvm_context *ctx,
+						struct ac_image_args *a)
 {
 	LLVMValueRef args[16];
 	LLVMTypeRef retty = ctx->v4f32;
 	const char *name = NULL;
 	const char *atomic_subop = "";
 	char intr_name[128], coords_type[64];
 
-	assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
-	       !a->level_zero);
-	assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
-		a->opcode != ac_image_store_mip) ||
-	       a->lod);
-	assert((a->bias ? 1 : 0) +
-	       (a->lod ? 1 : 0) +
-	       (a->level_zero ? 1 : 0) +
-	       (a->derivs[0] ? 1 : 0) <= 1);
-
 	bool sample = a->opcode == ac_image_sample ||
 		      a->opcode == ac_image_gather4 ||
 		      a->opcode == ac_image_get_lod;
 	bool atomic = a->opcode == ac_image_atomic ||
 		      a->opcode == ac_image_atomic_cmpswap;
 	bool da = a->dim == ac_image_cube ||
 		  a->dim == ac_image_1darray ||
 		  a->dim == ac_image_2darray ||
 		  a->dim == ac_image_2darraymsaa;
 	if (a->opcode == ac_image_get_lod)
@@ -1578,32 +1586,21 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 		name = "llvm.amdgcn.image.store.mip";
 		retty = ctx->voidt;
 		break;
 	case ac_image_atomic:
 	case ac_image_atomic_cmpswap:
 		name = "llvm.amdgcn.image.atomic.";
 		retty = ctx->i32;
 		if (a->opcode == ac_image_atomic_cmpswap) {
 			atomic_subop = "cmpswap";
 		} else {
-			switch (a->atomic) {
-			case ac_atomic_swap: atomic_subop = "swap"; break;
-			case ac_atomic_add: atomic_subop = "add"; break;
-			case ac_atomic_sub: atomic_subop = "sub"; break;
-			case ac_atomic_smin: atomic_subop = "smin"; break;
-			case ac_atomic_umin: atomic_subop = "umin"; break;
-			case ac_atomic_smax: atomic_subop = "smax"; break;
-			case ac_atomic_umax: atomic_subop = "umax"; break;
-			case ac_atomic_and: atomic_subop = "and"; break;
-			case ac_atomic_or: atomic_subop = "or"; break;
-			case ac_atomic_xor: atomic_subop = "xor"; break;
-			}
+			atomic_subop = get_atomic_name(a->atomic);
 		}
 		break;
 	case ac_image_get_lod:
 		name = "llvm.amdgcn.image.getlod";
 		break;
 	case ac_image_get_resinfo:
 		name = "llvm.amdgcn.image.getresinfo";
 		break;
 	default:
 		unreachable("invalid image opcode");
@@ -1633,20 +1630,164 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 	LLVMValueRef result =
 		ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
 				   a->attributes);
 	if (!sample && retty == ctx->v4f32) {
 		result = LLVMBuildBitCast(ctx->builder, result,
 					  ctx->v4i32, "");
 	}
 	return result;
 }
 
+LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
+				   struct ac_image_args *a)
+{
+	const char *overload[3] = { "", "", "" };
+	unsigned num_overloads = 0;
+	LLVMValueRef args[18];
+	unsigned num_args = 0;
+
+	assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
+	       !a->level_zero);
+	assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
+		a->opcode != ac_image_store_mip) ||
+	       a->lod);
+	assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
+	       (!a->compare && !a->offset));
+	assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
+		a->opcode == ac_image_get_lod) ||
+	       !a->bias);
+	assert((a->bias ? 1 : 0) +
+	       (a->lod ? 1 : 0) +
+	       (a->level_zero ? 1 : 0) +
+	       (a->derivs[0] ? 1 : 0) <= 1);
+
+	if (HAVE_LLVM < 0x0700)
+		return ac_build_image_opcode_llvm6(ctx, a);
+
+	bool sample = a->opcode == ac_image_sample ||
+		      a->opcode == ac_image_gather4 ||
+		      a->opcode == ac_image_get_lod;
+	bool atomic = a->opcode == ac_image_atomic ||
+		      a->opcode == ac_image_atomic_cmpswap;
+	LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
+
+	if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
+		args[num_args++] = a->data[0];
+		if (a->opcode == ac_image_atomic_cmpswap)
+			args[num_args++] = a->data[1];
+	}
+
+	if (!atomic)
+		args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false);
+
+	if (a->offset)
+		args[num_args++] = ac_to_integer(ctx, a->offset);
+	if (a->bias) {
+		args[num_args++] = ac_to_float(ctx, a->bias);
+		overload[num_overloads++] = ".f32";
+	}
+	if (a->compare)
+		args[num_args++] = ac_to_float(ctx, a->compare);
+	if (a->derivs[0]) {
+		unsigned count = ac_num_derivs(a->dim);
+		for (unsigned i = 0; i < count; ++i)
+			args[num_args++] = ac_to_float(ctx, a->derivs[i]);
+		overload[num_overloads++] = ".f32";
+	}
+	unsigned num_coords =
+		a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0;
+	for (unsigned i = 0; i < num_coords; ++i)
+		args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
+	if (a->lod)
+		args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
+	overload[num_overloads++] = sample ? ".f32" : ".i32";
+
+	args[num_args++] = a->resource;
+	if (sample) {
+		args[num_args++] = a->sampler;
+		args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
+	}
+
+	args[num_args++] = ctx->i32_0; /* texfailctrl */
+	args[num_args++] = LLVMConstInt(ctx->i32, a->cache_policy, false);
+
+	const char *name;
+	const char *atomic_subop = "";
+	switch (a->opcode) {
+	case ac_image_sample: name = "sample"; break;
+	case ac_image_gather4: name = "gather4"; break;
+	case ac_image_load: name = "load"; break;
+	case ac_image_load_mip: name = "load.mip"; break;
+	case ac_image_store: name = "store"; break;
+	case ac_image_store_mip: name = "store.mip"; break;
+	case ac_image_atomic:
+		name = "atomic.";
+		atomic_subop = get_atomic_name(a->atomic);
+		break;
+	case ac_image_atomic_cmpswap:
+		name = "atomic.";
+		atomic_subop = "cmpswap";
+		break;
+	case ac_image_get_lod: name = "getlod"; break;
+	case ac_image_get_resinfo: name = "getresinfo"; break;
+	default: unreachable("invalid image opcode");
+	}
+
+	const char *dimname;
+	switch (a->dim) {
+	case ac_image_1d: dimname = "1d"; break;
+	case ac_image_2d: dimname = "2d"; break;
+	case ac_image_3d: dimname = "3d"; break;
+	case ac_image_cube: dimname = "cube"; break;
+	case ac_image_1darray: dimname = "1darray"; break;
+	case ac_image_2darray: dimname = "2darray"; break;
+	case ac_image_2dmsaa: dimname = "2dmsaa"; break;
+	case ac_image_2darraymsaa: dimname = "2darraymsaa"; break;
+	default: unreachable("invalid dim");
+	}
+
+	bool lod_suffix =
+		a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
+	char intr_name[96];
+	snprintf(intr_name, sizeof(intr_name),
+		 "llvm.amdgcn.image.%s%s" /* base name */
+		 "%s%s%s" /* sample/gather modifiers */
+		 ".%s.%s%s%s%s", /* dimension and type overloads */
+		 name, atomic_subop,
+		 a->compare ? ".c" : "",
+		 a->bias ? ".b" :
+		 lod_suffix ? ".l" :
+		 a->derivs[0] ? ".d" :
+		 a->level_zero ? ".lz" : "",
+		 a->offset ? ".o" : "",
+		 dimname,
+		 atomic ? "i32" : "v4f32",
+		 overload[0], overload[1], overload[2]);
+
+	LLVMTypeRef retty;
+	if (atomic)
+		retty = ctx->i32;
+	else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
+		retty = ctx->voidt;
+	else
+		retty = ctx->v4f32;
+
+	LLVMValueRef result =
+		ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
+				   a->attributes);
+	if (!sample && retty == ctx->v4f32) {
+		result = LLVMBuildBitCast(ctx->builder, result,
+					  ctx->v4i32, "");
+	}
+	return result;
+}
+
 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
 				    LLVMValueRef args[2])
 {
 	if (HAVE_LLVM >= 0x0500) {
 		LLVMTypeRef v2f16 =
 			LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2);
 		LLVMValueRef res =
 			ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz",
 					   v2f16, args, 2,
 					   AC_FUNC_ATTR_READNONE);
-- 
2.14.1